vortex_vector/binaryview/
view.rs1use std::fmt;
7use std::hash::Hash;
8use std::hash::Hasher;
9use std::ops::Range;
10
11use static_assertions::assert_eq_align;
12use static_assertions::assert_eq_size;
13use vortex_buffer::ByteBuffer;
14use vortex_error::VortexResult;
15use vortex_error::VortexUnwrap;
16use vortex_error::vortex_ensure;
17use vortex_error::vortex_err;
18
19#[derive(Clone, Copy)]
24#[repr(C, align(16))]
25pub union BinaryView {
26 pub(crate) le_bytes: [u8; 16],
29
30 pub(crate) inlined: Inlined,
32
33 pub(crate) _ref: Ref,
35}
36
37assert_eq_align!(BinaryView, u128);
38assert_eq_size!(BinaryView, [u8; 16]);
39assert_eq_size!(Inlined, [u8; 16]);
40assert_eq_size!(Ref, [u8; 16]);
41
42#[derive(Clone, Copy, Debug, PartialEq, Eq)]
44#[repr(C, align(8))]
45pub struct Inlined {
46 pub size: u32,
48 pub data: [u8; BinaryView::MAX_INLINED_SIZE],
50}
51
52impl Inlined {
53 #[inline]
55 fn new<const N: usize>(value: &[u8]) -> Self {
56 debug_assert_eq!(value.len(), N);
57 let mut inlined = Self {
58 size: N.try_into().vortex_unwrap(),
59 data: [0u8; BinaryView::MAX_INLINED_SIZE],
60 };
61 inlined.data[..N].copy_from_slice(&value[..N]);
62 inlined
63 }
64
65 #[inline]
67 pub fn value(&self) -> &[u8] {
68 &self.data[0..(self.size as usize)]
69 }
70}
71
72#[derive(Clone, Copy, Debug)]
74#[repr(C, align(8))]
75pub struct Ref {
76 pub size: u32,
78 pub prefix: [u8; 4],
80 pub buffer_index: u32,
82 pub offset: u32,
84}
85
86impl Ref {
87 #[inline]
89 pub fn as_range(&self) -> Range<usize> {
90 self.offset as usize..(self.offset + self.size) as usize
91 }
92
93 #[inline]
95 pub fn with_buffer_and_offset(&self, buffer_index: u32, offset: u32) -> Ref {
96 Self {
97 size: self.size,
98 prefix: self.prefix,
99 buffer_index,
100 offset,
101 }
102 }
103}
104
105impl PartialEq for BinaryView {
106 fn eq(&self, other: &Self) -> bool {
107 let a = unsafe { std::mem::transmute::<&BinaryView, &u128>(self) };
108 let b = unsafe { std::mem::transmute::<&BinaryView, &u128>(other) };
109 a == b
110 }
111}
112impl Eq for BinaryView {}
113
114impl Hash for BinaryView {
115 fn hash<H: Hasher>(&self, state: &mut H) {
116 unsafe { std::mem::transmute::<&BinaryView, &u128>(self) }.hash(state);
117 }
118}
119
120impl Default for BinaryView {
121 fn default() -> Self {
122 Self::make_view(&[], 0, 0)
123 }
124}
125
126impl BinaryView {
127 pub const MAX_INLINED_SIZE: usize = 12;
129
130 #[inline(never)]
138 pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self {
139 match value.len() {
140 0 => Self {
141 inlined: Inlined::new::<0>(value),
142 },
143 1 => Self {
144 inlined: Inlined::new::<1>(value),
145 },
146 2 => Self {
147 inlined: Inlined::new::<2>(value),
148 },
149 3 => Self {
150 inlined: Inlined::new::<3>(value),
151 },
152 4 => Self {
153 inlined: Inlined::new::<4>(value),
154 },
155 5 => Self {
156 inlined: Inlined::new::<5>(value),
157 },
158 6 => Self {
159 inlined: Inlined::new::<6>(value),
160 },
161 7 => Self {
162 inlined: Inlined::new::<7>(value),
163 },
164 8 => Self {
165 inlined: Inlined::new::<8>(value),
166 },
167 9 => Self {
168 inlined: Inlined::new::<9>(value),
169 },
170 10 => Self {
171 inlined: Inlined::new::<10>(value),
172 },
173 11 => Self {
174 inlined: Inlined::new::<11>(value),
175 },
176 12 => Self {
177 inlined: Inlined::new::<12>(value),
178 },
179 _ => Self {
180 _ref: Ref {
181 size: u32::try_from(value.len()).vortex_unwrap(),
182 prefix: value[0..4].try_into().vortex_unwrap(),
183 buffer_index: block,
184 offset,
185 },
186 },
187 }
188 }
189
190 #[inline]
192 pub fn empty_view() -> Self {
193 Self { le_bytes: [0; 16] }
194 }
195
196 #[inline]
202 pub fn new_inlined(value: &[u8]) -> Self {
203 assert!(
204 value.len() <= Self::MAX_INLINED_SIZE,
205 "expected inlined value to be <= 12 bytes, was {}",
206 value.len()
207 );
208
209 Self::make_view(value, 0, 0)
210 }
211
212 #[inline]
214 pub fn len(&self) -> u32 {
215 unsafe { self.inlined.size }
216 }
217
218 #[inline]
220 pub fn is_empty(&self) -> bool {
221 self.len() == 0
222 }
223
224 #[inline]
226 #[expect(
227 clippy::cast_possible_truncation,
228 reason = "MAX_INLINED_SIZE is a small constant"
229 )]
230 pub fn is_inlined(&self) -> bool {
231 self.len() <= (Self::MAX_INLINED_SIZE as u32)
232 }
233
234 pub fn as_inlined(&self) -> &Inlined {
236 debug_assert!(self.is_inlined());
237 unsafe { &self.inlined }
238 }
239
240 pub fn as_view(&self) -> &Ref {
242 debug_assert!(!self.is_inlined());
243 unsafe { &self._ref }
244 }
245
246 pub fn as_view_mut(&mut self) -> &mut Ref {
248 unsafe { &mut self._ref }
249 }
250
251 pub fn as_u128(&self) -> u128 {
253 unsafe { u128::from_le_bytes(self.le_bytes) }
255 }
256}
257
258impl From<u128> for BinaryView {
259 fn from(value: u128) -> Self {
260 BinaryView {
261 le_bytes: value.to_le_bytes(),
262 }
263 }
264}
265
266impl From<Ref> for BinaryView {
267 fn from(value: Ref) -> Self {
268 BinaryView { _ref: value }
269 }
270}
271
272impl fmt::Debug for BinaryView {
273 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
274 let mut s = f.debug_struct("BinaryView");
275 if self.is_inlined() {
276 s.field("inline", &self.as_inlined());
277 } else {
278 s.field("ref", &self.as_view());
279 }
280 s.finish()
281 }
282}
283
284pub(super) fn validate_views<ValidateFn, IsValidFn>(
290 views: &[BinaryView],
291 buffers: impl AsRef<[ByteBuffer]>,
292 validity: IsValidFn,
293 validator: ValidateFn,
294) -> VortexResult<()>
295where
296 IsValidFn: Fn(usize) -> bool,
297 ValidateFn: Fn(&[u8]) -> bool,
298{
299 let buffers = buffers.as_ref();
300 for (idx, &view) in views.iter().enumerate() {
301 if !validity(idx) {
302 continue;
303 }
304
305 if view.is_inlined() {
306 let bytes = &unsafe { view.inlined }.data[..view.len() as usize];
308 vortex_ensure!(
309 validator(bytes),
310 "view at index {idx}: inlined bytes failed utf-8 validation"
311 );
312 } else {
313 let view = view.as_view();
315 let buf_index = view.buffer_index as usize;
316 let start_offset = view.offset as usize;
317 let end_offset = start_offset.saturating_add(view.size as usize);
318
319 let buf = buffers.get(buf_index).ok_or_else(||
320 vortex_err!("view at index {idx} references invalid buffer: {buf_index} out of bounds for BinaryViewVector with {} buffers",
321 buffers.len()))?;
322
323 vortex_ensure!(
324 start_offset < buf.len(),
325 "start offset {start_offset} out of bounds for buffer {buf_index} with size {}",
326 buf.len(),
327 );
328
329 vortex_ensure!(
330 end_offset <= buf.len(),
331 "end offset {end_offset} out of bounds for buffer {buf_index} with size {}",
332 buf.len(),
333 );
334
335 let bytes = &buf[start_offset..end_offset];
337 vortex_ensure!(
338 view.prefix == bytes[..4],
339 "VarBinView prefix does not match full string"
340 );
341
342 vortex_ensure!(
344 validator(bytes),
345 "view at index {idx}: outlined bytes failed utf-8 validation"
346 );
347 }
348 }
349
350 Ok(())
351}