vortex_vector/binaryview/
view.rs1use std::fmt;
7use std::hash::Hash;
8use std::hash::Hasher;
9use std::ops::Range;
10
11use static_assertions::assert_eq_align;
12use static_assertions::assert_eq_size;
13use vortex_buffer::ByteBuffer;
14use vortex_error::VortexExpect;
15use vortex_error::VortexResult;
16use vortex_error::vortex_ensure;
17use vortex_error::vortex_err;
18
19#[derive(Clone, Copy)]
24#[repr(C, align(16))]
25pub union BinaryView {
26 pub(crate) le_bytes: [u8; 16],
29
30 pub(crate) inlined: Inlined,
32
33 pub(crate) _ref: Ref,
35}
36
37assert_eq_align!(BinaryView, u128);
38assert_eq_size!(BinaryView, [u8; 16]);
39assert_eq_size!(Inlined, [u8; 16]);
40assert_eq_size!(Ref, [u8; 16]);
41
42#[derive(Clone, Copy, Debug, PartialEq, Eq)]
44#[repr(C, align(8))]
45pub struct Inlined {
46 pub size: u32,
48 pub data: [u8; BinaryView::MAX_INLINED_SIZE],
50}
51
52impl Inlined {
53 #[inline]
55 fn new<const N: usize>(value: &[u8]) -> Self {
56 debug_assert_eq!(value.len(), N);
57 let mut inlined = Self {
58 size: N.try_into().vortex_expect("inlined size must fit in u32"),
59 data: [0u8; BinaryView::MAX_INLINED_SIZE],
60 };
61 inlined.data[..N].copy_from_slice(&value[..N]);
62 inlined
63 }
64
65 #[inline]
67 pub fn value(&self) -> &[u8] {
68 &self.data[0..(self.size as usize)]
69 }
70}
71
72#[derive(Clone, Copy, Debug)]
74#[repr(C, align(8))]
75pub struct Ref {
76 pub size: u32,
78 pub prefix: [u8; 4],
80 pub buffer_index: u32,
82 pub offset: u32,
84}
85
86impl Ref {
87 #[inline]
89 pub fn as_range(&self) -> Range<usize> {
90 self.offset as usize..(self.offset + self.size) as usize
91 }
92
93 #[inline]
95 pub fn with_buffer_and_offset(&self, buffer_index: u32, offset: u32) -> Ref {
96 Self {
97 size: self.size,
98 prefix: self.prefix,
99 buffer_index,
100 offset,
101 }
102 }
103}
104
105impl PartialEq for BinaryView {
106 fn eq(&self, other: &Self) -> bool {
107 let a = unsafe { std::mem::transmute::<&BinaryView, &u128>(self) };
108 let b = unsafe { std::mem::transmute::<&BinaryView, &u128>(other) };
109 a == b
110 }
111}
112impl Eq for BinaryView {}
113
114impl Hash for BinaryView {
115 fn hash<H: Hasher>(&self, state: &mut H) {
116 unsafe { std::mem::transmute::<&BinaryView, &u128>(self) }.hash(state);
117 }
118}
119
120impl Default for BinaryView {
121 fn default() -> Self {
122 Self::make_view(&[], 0, 0)
123 }
124}
125
126impl BinaryView {
127 pub const MAX_INLINED_SIZE: usize = 12;
129
130 #[inline(never)]
138 pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self {
139 match value.len() {
140 0 => Self {
141 inlined: Inlined::new::<0>(value),
142 },
143 1 => Self {
144 inlined: Inlined::new::<1>(value),
145 },
146 2 => Self {
147 inlined: Inlined::new::<2>(value),
148 },
149 3 => Self {
150 inlined: Inlined::new::<3>(value),
151 },
152 4 => Self {
153 inlined: Inlined::new::<4>(value),
154 },
155 5 => Self {
156 inlined: Inlined::new::<5>(value),
157 },
158 6 => Self {
159 inlined: Inlined::new::<6>(value),
160 },
161 7 => Self {
162 inlined: Inlined::new::<7>(value),
163 },
164 8 => Self {
165 inlined: Inlined::new::<8>(value),
166 },
167 9 => Self {
168 inlined: Inlined::new::<9>(value),
169 },
170 10 => Self {
171 inlined: Inlined::new::<10>(value),
172 },
173 11 => Self {
174 inlined: Inlined::new::<11>(value),
175 },
176 12 => Self {
177 inlined: Inlined::new::<12>(value),
178 },
179 _ => Self {
180 _ref: Ref {
181 size: u32::try_from(value.len()).vortex_expect("value length must fit in u32"),
182 prefix: value[0..4]
183 .try_into()
184 .vortex_expect("prefix must be exactly 4 bytes"),
185 buffer_index: block,
186 offset,
187 },
188 },
189 }
190 }
191
192 #[inline]
194 pub fn empty_view() -> Self {
195 Self { le_bytes: [0; 16] }
196 }
197
198 #[inline]
204 pub fn new_inlined(value: &[u8]) -> Self {
205 assert!(
206 value.len() <= Self::MAX_INLINED_SIZE,
207 "expected inlined value to be <= 12 bytes, was {}",
208 value.len()
209 );
210
211 Self::make_view(value, 0, 0)
212 }
213
214 #[inline]
216 pub fn len(&self) -> u32 {
217 unsafe { self.inlined.size }
218 }
219
220 #[inline]
222 pub fn is_empty(&self) -> bool {
223 self.len() == 0
224 }
225
226 #[inline]
228 #[expect(
229 clippy::cast_possible_truncation,
230 reason = "MAX_INLINED_SIZE is a small constant"
231 )]
232 pub fn is_inlined(&self) -> bool {
233 self.len() <= (Self::MAX_INLINED_SIZE as u32)
234 }
235
236 pub fn as_inlined(&self) -> &Inlined {
238 debug_assert!(self.is_inlined());
239 unsafe { &self.inlined }
240 }
241
242 pub fn as_view(&self) -> &Ref {
244 debug_assert!(!self.is_inlined());
245 unsafe { &self._ref }
246 }
247
248 pub fn as_view_mut(&mut self) -> &mut Ref {
250 unsafe { &mut self._ref }
251 }
252
253 pub fn as_u128(&self) -> u128 {
255 unsafe { u128::from_le_bytes(self.le_bytes) }
257 }
258}
259
260impl From<u128> for BinaryView {
261 fn from(value: u128) -> Self {
262 BinaryView {
263 le_bytes: value.to_le_bytes(),
264 }
265 }
266}
267
268impl From<Ref> for BinaryView {
269 fn from(value: Ref) -> Self {
270 BinaryView { _ref: value }
271 }
272}
273
274impl fmt::Debug for BinaryView {
275 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
276 let mut s = f.debug_struct("BinaryView");
277 if self.is_inlined() {
278 s.field("inline", &self.as_inlined());
279 } else {
280 s.field("ref", &self.as_view());
281 }
282 s.finish()
283 }
284}
285
286pub(super) fn validate_views<ValidateFn, IsValidFn>(
292 views: &[BinaryView],
293 buffers: impl AsRef<[ByteBuffer]>,
294 validity: IsValidFn,
295 validator: ValidateFn,
296) -> VortexResult<()>
297where
298 IsValidFn: Fn(usize) -> bool,
299 ValidateFn: Fn(&[u8]) -> bool,
300{
301 let buffers = buffers.as_ref();
302 for (idx, &view) in views.iter().enumerate() {
303 if !validity(idx) {
304 continue;
305 }
306
307 if view.is_inlined() {
308 let bytes = &unsafe { view.inlined }.data[..view.len() as usize];
310 vortex_ensure!(
311 validator(bytes),
312 "view at index {idx}: inlined bytes failed utf-8 validation"
313 );
314 } else {
315 let view = view.as_view();
317 let buf_index = view.buffer_index as usize;
318 let start_offset = view.offset as usize;
319 let end_offset = start_offset.saturating_add(view.size as usize);
320
321 let buf = buffers.get(buf_index).ok_or_else(||
322 vortex_err!("view at index {idx} references invalid buffer: {buf_index} out of bounds for BinaryViewVector with {} buffers",
323 buffers.len()))?;
324
325 vortex_ensure!(
326 start_offset < buf.len(),
327 "start offset {start_offset} out of bounds for buffer {buf_index} with size {}",
328 buf.len(),
329 );
330
331 vortex_ensure!(
332 end_offset <= buf.len(),
333 "end offset {end_offset} out of bounds for buffer {buf_index} with size {}",
334 buf.len(),
335 );
336
337 let bytes = &buf[start_offset..end_offset];
339 vortex_ensure!(
340 view.prefix == bytes[..4],
341 "VarBinView prefix does not match full string"
342 );
343
344 vortex_ensure!(
346 validator(bytes),
347 "view at index {idx}: outlined bytes failed utf-8 validation"
348 );
349 }
350 }
351
352 Ok(())
353}