vortex_vector/binaryview/
view.rs1use std::fmt;
7use std::hash::{Hash, Hasher};
8use std::ops::Range;
9
10use static_assertions::{assert_eq_align, assert_eq_size};
11use vortex_buffer::ByteBuffer;
12use vortex_error::{VortexResult, VortexUnwrap, vortex_ensure, vortex_err};
13
14#[derive(Clone, Copy)]
19#[repr(C, align(16))]
20pub union BinaryView {
21 pub(crate) le_bytes: [u8; 16],
24
25 pub(crate) inlined: Inlined,
27
28 pub(crate) _ref: Ref,
30}
31
32assert_eq_align!(BinaryView, u128);
33assert_eq_size!(BinaryView, [u8; 16]);
34assert_eq_size!(Inlined, [u8; 16]);
35assert_eq_size!(Ref, [u8; 16]);
36
37#[derive(Clone, Copy, Debug, PartialEq, Eq)]
39#[repr(C, align(8))]
40pub struct Inlined {
41 pub size: u32,
43 pub data: [u8; BinaryView::MAX_INLINED_SIZE],
45}
46
47impl Inlined {
48 fn new<const N: usize>(value: &[u8]) -> Self {
50 debug_assert_eq!(value.len(), N);
51 let mut inlined = Self {
52 size: N.try_into().vortex_unwrap(),
53 data: [0u8; BinaryView::MAX_INLINED_SIZE],
54 };
55 inlined.data[..N].copy_from_slice(&value[..N]);
56 inlined
57 }
58
59 #[inline]
61 pub fn value(&self) -> &[u8] {
62 &self.data[0..(self.size as usize)]
63 }
64}
65
66#[derive(Clone, Copy, Debug)]
68#[repr(C, align(8))]
69pub struct Ref {
70 pub size: u32,
72 pub prefix: [u8; 4],
74 pub buffer_index: u32,
76 pub offset: u32,
78}
79
80impl Ref {
81 #[inline]
83 pub fn as_range(&self) -> Range<usize> {
84 self.offset as usize..(self.offset + self.size) as usize
85 }
86
87 #[inline]
89 pub fn with_buffer_and_offset(&self, buffer_index: u32, offset: u32) -> Ref {
90 Self {
91 size: self.size,
92 prefix: self.prefix,
93 buffer_index,
94 offset,
95 }
96 }
97}
98
99impl PartialEq for BinaryView {
100 fn eq(&self, other: &Self) -> bool {
101 let a = unsafe { std::mem::transmute::<&BinaryView, &u128>(self) };
102 let b = unsafe { std::mem::transmute::<&BinaryView, &u128>(other) };
103 a == b
104 }
105}
106impl Eq for BinaryView {}
107
108impl Hash for BinaryView {
109 fn hash<H: Hasher>(&self, state: &mut H) {
110 unsafe { std::mem::transmute::<&BinaryView, &u128>(self) }.hash(state);
111 }
112}
113
114impl Default for BinaryView {
115 fn default() -> Self {
116 Self::make_view(&[], 0, 0)
117 }
118}
119
120impl BinaryView {
121 pub const MAX_INLINED_SIZE: usize = 12;
123
124 #[inline(never)]
132 pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self {
133 match value.len() {
134 0 => Self {
135 inlined: Inlined::new::<0>(value),
136 },
137 1 => Self {
138 inlined: Inlined::new::<1>(value),
139 },
140 2 => Self {
141 inlined: Inlined::new::<2>(value),
142 },
143 3 => Self {
144 inlined: Inlined::new::<3>(value),
145 },
146 4 => Self {
147 inlined: Inlined::new::<4>(value),
148 },
149 5 => Self {
150 inlined: Inlined::new::<5>(value),
151 },
152 6 => Self {
153 inlined: Inlined::new::<6>(value),
154 },
155 7 => Self {
156 inlined: Inlined::new::<7>(value),
157 },
158 8 => Self {
159 inlined: Inlined::new::<8>(value),
160 },
161 9 => Self {
162 inlined: Inlined::new::<9>(value),
163 },
164 10 => Self {
165 inlined: Inlined::new::<10>(value),
166 },
167 11 => Self {
168 inlined: Inlined::new::<11>(value),
169 },
170 12 => Self {
171 inlined: Inlined::new::<12>(value),
172 },
173 _ => Self {
174 _ref: Ref {
175 size: u32::try_from(value.len()).vortex_unwrap(),
176 prefix: value[0..4].try_into().vortex_unwrap(),
177 buffer_index: block,
178 offset,
179 },
180 },
181 }
182 }
183
184 #[inline]
186 pub fn empty_view() -> Self {
187 Self { le_bytes: [0; 16] }
188 }
189
190 #[inline]
196 pub fn new_inlined(value: &[u8]) -> Self {
197 assert!(
198 value.len() <= Self::MAX_INLINED_SIZE,
199 "expected inlined value to be <= 12 bytes, was {}",
200 value.len()
201 );
202
203 Self::make_view(value, 0, 0)
204 }
205
206 #[inline]
208 pub fn len(&self) -> u32 {
209 unsafe { self.inlined.size }
210 }
211
212 #[inline]
214 pub fn is_empty(&self) -> bool {
215 self.len() == 0
216 }
217
218 #[inline]
220 #[allow(clippy::cast_possible_truncation)]
221 pub fn is_inlined(&self) -> bool {
222 self.len() <= (Self::MAX_INLINED_SIZE as u32)
223 }
224
225 pub fn as_inlined(&self) -> &Inlined {
227 debug_assert!(self.is_inlined());
228 unsafe { &self.inlined }
229 }
230
231 pub fn as_view(&self) -> &Ref {
233 debug_assert!(!self.is_inlined());
234 unsafe { &self._ref }
235 }
236
237 pub fn as_view_mut(&mut self) -> &mut Ref {
239 unsafe { &mut self._ref }
240 }
241
242 pub fn as_u128(&self) -> u128 {
244 unsafe { u128::from_le_bytes(self.le_bytes) }
246 }
247}
248
249impl From<u128> for BinaryView {
250 fn from(value: u128) -> Self {
251 BinaryView {
252 le_bytes: value.to_le_bytes(),
253 }
254 }
255}
256
257impl From<Ref> for BinaryView {
258 fn from(value: Ref) -> Self {
259 BinaryView { _ref: value }
260 }
261}
262
263impl fmt::Debug for BinaryView {
264 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
265 let mut s = f.debug_struct("BinaryView");
266 if self.is_inlined() {
267 s.field("inline", &self.as_inlined());
268 } else {
269 s.field("ref", &self.as_view());
270 }
271 s.finish()
272 }
273}
274
275pub(super) fn validate_views<ValidateFn, IsValidFn>(
281 views: &[BinaryView],
282 buffers: impl AsRef<[ByteBuffer]>,
283 validity: IsValidFn,
284 validator: ValidateFn,
285) -> VortexResult<()>
286where
287 IsValidFn: Fn(usize) -> bool,
288 ValidateFn: Fn(&[u8]) -> bool,
289{
290 let buffers = buffers.as_ref();
291 for (idx, &view) in views.iter().enumerate() {
292 if !validity(idx) {
293 continue;
294 }
295
296 if view.is_inlined() {
297 let bytes = &unsafe { view.inlined }.data[..view.len() as usize];
299 vortex_ensure!(
300 validator(bytes),
301 "view at index {idx}: inlined bytes failed utf-8 validation"
302 );
303 } else {
304 let view = view.as_view();
306 let buf_index = view.buffer_index as usize;
307 let start_offset = view.offset as usize;
308 let end_offset = start_offset.saturating_add(view.size as usize);
309
310 let buf = buffers.get(buf_index).ok_or_else(||
311 vortex_err!("view at index {idx} references invalid buffer: {buf_index} out of bounds for BinaryViewVector with {} buffers",
312 buffers.len()))?;
313
314 vortex_ensure!(
315 start_offset < buf.len(),
316 "start offset {start_offset} out of bounds for buffer {buf_index} with size {}",
317 buf.len(),
318 );
319
320 vortex_ensure!(
321 end_offset <= buf.len(),
322 "end offset {end_offset} out of bounds for buffer {buf_index} with size {}",
323 buf.len(),
324 );
325
326 let bytes = &buf[start_offset..end_offset];
328 vortex_ensure!(
329 view.prefix == bytes[..4],
330 "VarBinView prefix does not match full string"
331 );
332
333 vortex_ensure!(
335 validator(bytes),
336 "view at index {idx}: outlined bytes failed utf-8 validation"
337 );
338 }
339 }
340
341 Ok(())
342}