vortex_array/arrays/varbinview/
mod.rs1use std::fmt::{Debug, Formatter};
5use std::ops::Range;
6
7use static_assertions::{assert_eq_align, assert_eq_size};
8use vortex_buffer::{Alignment, Buffer, ByteBuffer};
9use vortex_dtype::{DType, Nullability};
10use vortex_error::{VortexResult, VortexUnwrap, vortex_bail, vortex_panic};
11
12use crate::builders::{ArrayBuilder, VarBinViewBuilder};
13use crate::stats::{ArrayStats, StatsSetRef};
14use crate::validity::Validity;
15use crate::vtable::{
16 ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
17 ValidityVTableFromValidityHelper,
18};
19use crate::{Canonical, EncodingId, EncodingRef, vtable};
20
21mod accessor;
22mod compact;
23mod compute;
24mod ops;
25mod serde;
26
27pub use compact::*;
28
29#[derive(Clone, Copy, Debug, PartialEq, Eq)]
30#[repr(C, align(8))]
31pub struct Inlined {
32 size: u32,
33 data: [u8; BinaryView::MAX_INLINED_SIZE],
34}
35
36impl Inlined {
37 fn new<const N: usize>(value: &[u8]) -> Self {
38 let mut inlined = Self {
39 size: N.try_into().vortex_unwrap(),
40 data: [0u8; BinaryView::MAX_INLINED_SIZE],
41 };
42 inlined.data[..N].copy_from_slice(&value[..N]);
43 inlined
44 }
45
46 #[inline]
47 pub fn value(&self) -> &[u8] {
48 &self.data[0..(self.size as usize)]
49 }
50}
51
52#[derive(Clone, Copy, Debug)]
53#[repr(C, align(8))]
54pub struct Ref {
55 size: u32,
56 prefix: [u8; 4],
57 buffer_index: u32,
58 offset: u32,
59}
60
61impl Ref {
62 pub fn new(size: u32, prefix: [u8; 4], buffer_index: u32, offset: u32) -> Self {
63 Self {
64 size,
65 prefix,
66 buffer_index,
67 offset,
68 }
69 }
70
71 #[inline]
72 pub fn buffer_index(&self) -> u32 {
73 self.buffer_index
74 }
75
76 #[inline]
77 pub fn offset(&self) -> u32 {
78 self.offset
79 }
80
81 #[inline]
82 pub fn prefix(&self) -> &[u8; 4] {
83 &self.prefix
84 }
85
86 #[inline]
87 pub fn to_range(&self) -> Range<usize> {
88 self.offset as usize..(self.offset + self.size) as usize
89 }
90}
91
92#[derive(Clone, Copy)]
93#[repr(C, align(16))]
94pub union BinaryView {
95 le_bytes: [u8; 16],
98
99 inlined: Inlined,
101
102 _ref: Ref,
104}
105
106assert_eq_size!(BinaryView, [u8; 16]);
107assert_eq_size!(Inlined, [u8; 16]);
108assert_eq_size!(Ref, [u8; 16]);
109assert_eq_align!(BinaryView, u128);
110
111impl BinaryView {
112 pub const MAX_INLINED_SIZE: usize = 12;
113
114 #[inline(never)]
122 pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self {
123 match value.len() {
124 0 => Self {
125 inlined: Inlined::new::<0>(value),
126 },
127 1 => Self {
128 inlined: Inlined::new::<1>(value),
129 },
130 2 => Self {
131 inlined: Inlined::new::<2>(value),
132 },
133 3 => Self {
134 inlined: Inlined::new::<3>(value),
135 },
136 4 => Self {
137 inlined: Inlined::new::<4>(value),
138 },
139 5 => Self {
140 inlined: Inlined::new::<5>(value),
141 },
142 6 => Self {
143 inlined: Inlined::new::<6>(value),
144 },
145 7 => Self {
146 inlined: Inlined::new::<7>(value),
147 },
148 8 => Self {
149 inlined: Inlined::new::<8>(value),
150 },
151 9 => Self {
152 inlined: Inlined::new::<9>(value),
153 },
154 10 => Self {
155 inlined: Inlined::new::<10>(value),
156 },
157 11 => Self {
158 inlined: Inlined::new::<11>(value),
159 },
160 12 => Self {
161 inlined: Inlined::new::<12>(value),
162 },
163 _ => Self {
164 _ref: Ref::new(
165 u32::try_from(value.len()).vortex_unwrap(),
166 value[0..4].try_into().vortex_unwrap(),
167 block,
168 offset,
169 ),
170 },
171 }
172 }
173
174 #[inline]
176 pub fn empty_view() -> Self {
177 Self::new_inlined(&[])
178 }
179
180 #[inline]
182 pub fn new_inlined(value: &[u8]) -> Self {
183 assert!(
184 value.len() <= Self::MAX_INLINED_SIZE,
185 "expected inlined value to be <= 12 bytes, was {}",
186 value.len()
187 );
188
189 Self::make_view(value, 0, 0)
190 }
191
192 #[inline]
193 pub fn len(&self) -> u32 {
194 unsafe { self.inlined.size }
195 }
196
197 #[inline]
198 pub fn is_empty(&self) -> bool {
199 self.len() > 0
200 }
201
202 #[inline]
203 #[allow(clippy::cast_possible_truncation)]
204 pub fn is_inlined(&self) -> bool {
205 self.len() <= (Self::MAX_INLINED_SIZE as u32)
206 }
207
208 pub fn as_inlined(&self) -> &Inlined {
209 unsafe { &self.inlined }
210 }
211
212 pub fn as_view(&self) -> &Ref {
213 unsafe { &self._ref }
214 }
215
216 pub fn as_u128(&self) -> u128 {
217 unsafe { u128::from_le_bytes(self.le_bytes) }
219 }
220
221 #[inline(always)]
224 pub fn offset_view(self, offset: u32) -> Self {
225 if self.is_inlined() {
226 self
227 } else {
228 let view_ref = self.as_view();
230 Self {
231 _ref: Ref::new(
232 self.len(),
233 *view_ref.prefix(),
234 offset + view_ref.buffer_index(),
235 view_ref.offset(),
236 ),
237 }
238 }
239 }
240}
241
242impl From<u128> for BinaryView {
243 fn from(value: u128) -> Self {
244 BinaryView {
245 le_bytes: value.to_le_bytes(),
246 }
247 }
248}
249
250impl Debug for BinaryView {
251 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
252 let mut s = f.debug_struct("BinaryView");
253 if self.is_inlined() {
254 s.field("inline", &"i".to_string());
255 } else {
256 s.field("ref", &"r".to_string());
257 }
258 s.finish()
259 }
260}
261
262vtable!(VarBinView);
263
264impl VTable for VarBinViewVTable {
265 type Array = VarBinViewArray;
266 type Encoding = VarBinViewEncoding;
267
268 type ArrayVTable = Self;
269 type CanonicalVTable = Self;
270 type OperationsVTable = Self;
271 type ValidityVTable = ValidityVTableFromValidityHelper;
272 type VisitorVTable = Self;
273 type ComputeVTable = NotSupported;
274 type EncodeVTable = NotSupported;
275 type SerdeVTable = Self;
276
277 fn id(_encoding: &Self::Encoding) -> EncodingId {
278 EncodingId::new_ref("vortex.varbinview")
279 }
280
281 fn encoding(_array: &Self::Array) -> EncodingRef {
282 EncodingRef::new_ref(VarBinViewEncoding.as_ref())
283 }
284}
285
286#[derive(Clone, Debug)]
287pub struct VarBinViewArray {
288 dtype: DType,
289 buffers: Vec<ByteBuffer>,
290 views: Buffer<BinaryView>,
291 validity: Validity,
292 stats_set: ArrayStats,
293}
294
295#[derive(Clone, Debug)]
296pub struct VarBinViewEncoding;
297
298impl VarBinViewArray {
299 pub fn try_new(
300 views: Buffer<BinaryView>,
301 buffers: Vec<ByteBuffer>,
302 dtype: DType,
303 validity: Validity,
304 ) -> VortexResult<Self> {
305 if views.alignment() != Alignment::of::<BinaryView>() {
306 vortex_bail!("Views must be aligned to a 128 bits");
307 }
308
309 if !matches!(dtype, DType::Binary(_) | DType::Utf8(_)) {
310 vortex_bail!(MismatchedTypes: "utf8 or binary", dtype);
311 }
312
313 if dtype.is_nullable() == (validity == Validity::NonNullable) {
314 vortex_bail!("incorrect validity {:?}", validity);
315 }
316
317 Ok(Self {
318 dtype,
319 buffers,
320 views,
321 validity,
322 stats_set: Default::default(),
323 })
324 }
325
326 pub fn nbuffers(&self) -> usize {
328 self.buffers.len()
329 }
330
331 #[inline]
337 pub fn views(&self) -> &Buffer<BinaryView> {
338 &self.views
339 }
340
341 #[inline]
345 pub fn bytes_at(&self, index: usize) -> ByteBuffer {
346 let views = self.views();
347 let view = &views[index];
348 if !view.is_inlined() {
350 let view_ref = view.as_view();
351 self.buffer(view_ref.buffer_index() as usize)
352 .slice(view_ref.to_range())
353 } else {
354 views
356 .clone()
357 .into_byte_buffer()
358 .slice_ref(view.as_inlined().value())
359 }
360 }
361
362 #[inline]
369 pub fn buffer(&self, idx: usize) -> &ByteBuffer {
370 if idx >= self.nbuffers() {
371 vortex_panic!(
372 "{idx} buffer index out of bounds, there are {} buffers",
373 self.nbuffers()
374 );
375 }
376 &self.buffers[idx]
377 }
378
379 #[inline]
381 pub fn buffers(&self) -> &[ByteBuffer] {
382 &self.buffers
383 }
384
385 #[allow(clippy::same_name_method)]
387 pub fn from_iter<T: AsRef<[u8]>, I: IntoIterator<Item = Option<T>>>(
388 iter: I,
389 dtype: DType,
390 ) -> Self {
391 let iter = iter.into_iter();
392 let mut builder = VarBinViewBuilder::with_capacity(dtype, iter.size_hint().0);
393
394 for item in iter {
395 match item {
396 None => builder.append_null(),
397 Some(v) => builder.append_value(v),
398 }
399 }
400
401 builder.finish_into_varbinview()
402 }
403
404 pub fn from_iter_str<T: AsRef<str>, I: IntoIterator<Item = T>>(iter: I) -> Self {
405 let iter = iter.into_iter();
406 let mut builder = VarBinViewBuilder::with_capacity(
407 DType::Utf8(Nullability::NonNullable),
408 iter.size_hint().0,
409 );
410
411 for item in iter {
412 builder.append_value(item.as_ref());
413 }
414
415 builder.finish_into_varbinview()
416 }
417
418 pub fn from_iter_nullable_str<T: AsRef<str>, I: IntoIterator<Item = Option<T>>>(
419 iter: I,
420 ) -> Self {
421 let iter = iter.into_iter();
422 let mut builder = VarBinViewBuilder::with_capacity(
423 DType::Utf8(Nullability::Nullable),
424 iter.size_hint().0,
425 );
426
427 for item in iter {
428 match item {
429 None => builder.append_null(),
430 Some(v) => builder.append_value(v.as_ref()),
431 }
432 }
433
434 builder.finish_into_varbinview()
435 }
436
437 pub fn from_iter_bin<T: AsRef<[u8]>, I: IntoIterator<Item = T>>(iter: I) -> Self {
438 let iter = iter.into_iter();
439 let mut builder = VarBinViewBuilder::with_capacity(
440 DType::Binary(Nullability::NonNullable),
441 iter.size_hint().0,
442 );
443
444 for item in iter {
445 builder.append_value(item.as_ref());
446 }
447
448 builder.finish_into_varbinview()
449 }
450
451 pub fn from_iter_nullable_bin<T: AsRef<[u8]>, I: IntoIterator<Item = Option<T>>>(
452 iter: I,
453 ) -> Self {
454 let iter = iter.into_iter();
455 let mut builder = VarBinViewBuilder::with_capacity(
456 DType::Binary(Nullability::Nullable),
457 iter.size_hint().0,
458 );
459
460 for item in iter {
461 match item {
462 None => builder.append_null(),
463 Some(v) => builder.append_value(v.as_ref()),
464 }
465 }
466
467 builder.finish_into_varbinview()
468 }
469}
470
471impl ArrayVTable<VarBinViewVTable> for VarBinViewVTable {
472 fn len(array: &VarBinViewArray) -> usize {
473 array.views.len()
474 }
475
476 fn dtype(array: &VarBinViewArray) -> &DType {
477 &array.dtype
478 }
479
480 fn stats(array: &VarBinViewArray) -> StatsSetRef<'_> {
481 array.stats_set.to_ref(array.as_ref())
482 }
483}
484
485impl ValidityHelper for VarBinViewArray {
486 fn validity(&self) -> &Validity {
487 &self.validity
488 }
489}
490
491impl CanonicalVTable<VarBinViewVTable> for VarBinViewVTable {
492 fn canonicalize(array: &VarBinViewArray) -> VortexResult<Canonical> {
493 Ok(Canonical::VarBinView(array.clone()))
494 }
495
496 fn append_to_builder(
497 array: &VarBinViewArray,
498 builder: &mut dyn ArrayBuilder,
499 ) -> VortexResult<()> {
500 builder.extend_from_array(array.as_ref())
501 }
502}
503
504impl<'a> FromIterator<Option<&'a [u8]>> for VarBinViewArray {
505 fn from_iter<T: IntoIterator<Item = Option<&'a [u8]>>>(iter: T) -> Self {
506 Self::from_iter_nullable_bin(iter)
507 }
508}
509
510impl FromIterator<Option<Vec<u8>>> for VarBinViewArray {
511 fn from_iter<T: IntoIterator<Item = Option<Vec<u8>>>>(iter: T) -> Self {
512 Self::from_iter_nullable_bin(iter)
513 }
514}
515
516impl FromIterator<Option<String>> for VarBinViewArray {
517 fn from_iter<T: IntoIterator<Item = Option<String>>>(iter: T) -> Self {
518 Self::from_iter_nullable_str(iter)
519 }
520}
521
522impl<'a> FromIterator<Option<&'a str>> for VarBinViewArray {
523 fn from_iter<T: IntoIterator<Item = Option<&'a str>>>(iter: T) -> Self {
524 Self::from_iter_nullable_str(iter)
525 }
526}
527
528#[cfg(test)]
529mod test {
530 use vortex_scalar::Scalar;
531
532 use crate::arrays::varbinview::{BinaryView, VarBinViewArray};
533 use crate::{Array, Canonical, IntoArray};
534
535 #[test]
536 pub fn varbin_view() {
537 let binary_arr =
538 VarBinViewArray::from_iter_str(["hello world", "hello world this is a long string"]);
539 assert_eq!(binary_arr.len(), 2);
540 assert_eq!(
541 binary_arr.scalar_at(0).unwrap(),
542 Scalar::from("hello world")
543 );
544 assert_eq!(
545 binary_arr.scalar_at(1).unwrap(),
546 Scalar::from("hello world this is a long string")
547 );
548 }
549
550 #[test]
551 pub fn slice_array() {
552 let binary_arr =
553 VarBinViewArray::from_iter_str(["hello world", "hello world this is a long string"])
554 .slice(1, 2)
555 .unwrap();
556 assert_eq!(
557 binary_arr.scalar_at(0).unwrap(),
558 Scalar::from("hello world this is a long string")
559 );
560 }
561
562 #[test]
563 pub fn flatten_array() {
564 let binary_arr = VarBinViewArray::from_iter_str(["string1", "string2"]);
565
566 let flattened = binary_arr.to_canonical().unwrap();
567 assert!(matches!(flattened, Canonical::VarBinView(_)));
568
569 let var_bin = flattened.into_varbinview().unwrap().into_array();
570 assert_eq!(var_bin.scalar_at(0).unwrap(), Scalar::from("string1"));
571 assert_eq!(var_bin.scalar_at(1).unwrap(), Scalar::from("string2"));
572 }
573
574 #[test]
575 pub fn binary_view_size_and_alignment() {
576 assert_eq!(size_of::<BinaryView>(), 16);
577 assert_eq!(align_of::<BinaryView>(), 16);
578 }
579}