vortex_vector/binaryview/
vector.rs1use std::fmt::Debug;
7use std::ops::BitAnd;
8use std::ops::RangeBounds;
9use std::sync::Arc;
10
11use vortex_buffer::Alignment;
12use vortex_buffer::Buffer;
13use vortex_buffer::ByteBuffer;
14use vortex_error::VortexExpect;
15use vortex_error::VortexResult;
16use vortex_error::vortex_ensure;
17use vortex_mask::Mask;
18
19use crate::VectorOps;
20use crate::binaryview::BinaryViewScalar;
21use crate::binaryview::BinaryViewType;
22use crate::binaryview::vector_mut::BinaryViewVectorMut;
23use crate::binaryview::view::BinaryView;
24use crate::binaryview::view::validate_views;
25
26#[derive(Debug, Clone)]
30pub struct BinaryViewVector<T: BinaryViewType> {
31 views: Buffer<BinaryView>,
33 buffers: Arc<Box<[ByteBuffer]>>,
35 validity: Mask,
37 _marker: std::marker::PhantomData<T>,
39}
40
41impl<T: BinaryViewType> PartialEq for BinaryViewVector<T> {
42 fn eq(&self, other: &Self) -> bool {
43 if self.views.len() != other.views.len() {
44 return false;
45 }
46 if self.validity != other.validity {
48 return false;
49 }
50 self.views
52 .iter()
53 .zip(other.views.iter())
54 .enumerate()
55 .all(|(i, (self_view, other_view))| {
56 if !self.validity.value(i) {
58 return true;
59 }
60 let self_bytes: &[u8] = if self_view.is_inlined() {
62 self_view.as_inlined().value()
63 } else {
64 let view_ref = self_view.as_view();
65 let buffer = &self.buffers[view_ref.buffer_index as usize];
66 &buffer[view_ref.as_range()]
67 };
68
69 let other_bytes: &[u8] = if other_view.is_inlined() {
70 other_view.as_inlined().value()
71 } else {
72 let view_ref = other_view.as_view();
73 let buffer = &other.buffers[view_ref.buffer_index as usize];
74 &buffer[view_ref.as_range()]
75 };
76
77 self_bytes == other_bytes
78 })
79 }
80}
81
82impl<T: BinaryViewType> Eq for BinaryViewVector<T> {}
83
84impl<T: BinaryViewType> BinaryViewVector<T> {
85 pub unsafe fn new_unchecked(
95 views: Buffer<BinaryView>,
96 buffers: Arc<Box<[ByteBuffer]>>,
97 validity: Mask,
98 ) -> Self {
99 if cfg!(debug_assertions) {
100 Self::new(views, buffers, validity)
101 } else {
102 Self {
103 views,
104 validity,
105 buffers,
106 _marker: std::marker::PhantomData,
107 }
108 }
109 }
110
111 pub fn new(views: Buffer<BinaryView>, buffers: Arc<Box<[ByteBuffer]>>, validity: Mask) -> Self {
118 Self::try_new(views, buffers, validity).vortex_expect("Failed to create `BinaryViewVector`")
119 }
120
121 pub fn try_new(
132 views: Buffer<BinaryView>,
133 buffers: Arc<Box<[ByteBuffer]>>,
134 validity: Mask,
135 ) -> VortexResult<Self> {
136 vortex_ensure!(
137 views.len() == validity.len(),
138 "views buffer length {} != validity length {}",
139 views.len(),
140 validity.len()
141 );
142
143 validate_views(
144 &views,
145 &*buffers,
146 |index| validity.value(index),
147 T::validate,
148 )?;
149
150 Ok(Self {
151 views,
152 buffers,
153 validity,
154 _marker: std::marker::PhantomData,
155 })
156 }
157
158 pub fn into_parts(self) -> (Buffer<BinaryView>, Arc<Box<[ByteBuffer]>>, Mask) {
160 (self.views, self.buffers, self.validity)
161 }
162
163 pub fn get(&self, index: usize) -> Option<T::Scalar> {
167 if !self.validity.value(index) {
168 return None;
169 }
170
171 let view = &self.views[index];
172 if view.is_inlined() {
173 let view = view.as_inlined();
174
175 let buffer = self
177 .views
178 .clone()
179 .into_byte_buffer()
180 .aligned(Alignment::none())
181 .slice_ref(&view.data[..view.size as usize]);
182
183 Some(unsafe { T::scalar_from_buffer_unchecked(buffer) })
187 } else {
188 let view_ref = view.as_view();
190 let buffer = &self.buffers[view_ref.buffer_index as usize];
191
192 let start = view_ref.offset as usize;
193 let length = view_ref.size as usize;
194 let buffer_slice = buffer.slice(start..start + length);
195
196 Some(unsafe { T::scalar_from_buffer_unchecked(buffer_slice) })
200 }
201 }
202
203 pub fn get_ref(&self, index: usize) -> Option<&T::Slice> {
207 if !self.validity.value(index) {
208 return None;
209 }
210
211 Some(unsafe { T::from_bytes_unchecked(self.get_ref_unchecked(index)) })
212 }
213
214 pub unsafe fn get_ref_unchecked(&self, index: usize) -> &[u8] {
220 let view = &self.views[index];
221 if view.is_inlined() {
222 let view = view.as_inlined();
223 &view.data[..view.size as usize]
227 } else {
228 let view_ref = view.as_view();
230 let buffer = &self.buffers[view_ref.buffer_index as usize];
231
232 let start = view_ref.offset as usize;
233 let length = view_ref.size as usize;
234
235 &buffer.as_bytes()[start..start + length]
239 }
240 }
241
242 pub fn buffers(&self) -> &Arc<Box<[ByteBuffer]>> {
244 &self.buffers
245 }
246
247 pub fn views(&self) -> &Buffer<BinaryView> {
249 &self.views
250 }
251}
252
253impl<T: BinaryViewType> VectorOps for BinaryViewVector<T> {
254 type Mutable = BinaryViewVectorMut<T>;
255 type Scalar = BinaryViewScalar<T>;
256
257 fn len(&self) -> usize {
258 self.views.len()
259 }
260
261 fn validity(&self) -> &Mask {
262 &self.validity
263 }
264
265 fn mask_validity(&mut self, mask: &Mask) {
266 self.validity = self.validity.bitand(mask);
267 }
268
269 fn scalar_at(&self, index: usize) -> BinaryViewScalar<T> {
270 assert!(index < self.len());
271 BinaryViewScalar::<T>::new(self.get(index))
272 }
273
274 fn slice(&self, range: impl RangeBounds<usize> + Clone + Debug) -> Self {
275 BinaryViewVector {
276 views: self.views.slice(range.clone()),
277 buffers: self.buffers().clone(),
278 validity: self.validity.slice(range),
279 _marker: self._marker,
280 }
281 }
282
283 fn clear(&mut self) {
284 self.views.clear();
285 self.validity = Mask::new_true(0);
286 self.buffers = Arc::new(Box::new([]));
287 }
288
289 fn try_into_mut(self) -> Result<BinaryViewVectorMut<T>, Self> {
290 let views_mut = match self.views.try_into_mut() {
291 Ok(views_mut) => views_mut,
292 Err(views) => {
293 return Err(Self {
294 views,
295 validity: self.validity,
296 buffers: self.buffers,
297 _marker: std::marker::PhantomData,
298 });
299 }
300 };
301
302 let validity_mut = match self.validity.try_into_mut() {
303 Ok(validity_mut) => validity_mut,
304 Err(validity) => {
305 return Err(Self {
306 views: views_mut.freeze(),
307 validity,
308 buffers: self.buffers,
309 _marker: std::marker::PhantomData,
310 });
311 }
312 };
313
314 let buffers_mut = match Arc::try_unwrap(self.buffers) {
315 Ok(buffers) => buffers.into_vec(),
316 Err(buffers) => {
317 buffers.iter().cloned().collect()
319 }
320 };
321
322 unsafe {
325 Ok(BinaryViewVectorMut::new_unchecked(
326 views_mut,
327 validity_mut,
328 buffers_mut,
329 ))
330 }
331 }
332
333 fn into_mut(self) -> BinaryViewVectorMut<T> {
334 let views_mut = self.views.into_mut();
335 let validity_mut = self.validity.into_mut();
336
337 let buffers_mut = Arc::try_unwrap(self.buffers)
340 .unwrap_or_else(|arc| (*arc).clone())
341 .into_vec();
342
343 unsafe { BinaryViewVectorMut::new_unchecked(views_mut, validity_mut, buffers_mut) }
346 }
347}
348
349#[cfg(test)]
350mod tests {
351 use std::sync::Arc;
352
353 use vortex_buffer::ByteBuffer;
354 use vortex_buffer::buffer;
355 use vortex_mask::Mask;
356
357 use crate::VectorMutOps;
358 use crate::VectorOps;
359 use crate::binaryview::StringVector;
360 use crate::binaryview::StringVectorMut;
361 use crate::binaryview::view::BinaryView;
362
363 #[test]
364 #[should_panic(expected = "views buffer length 1 != validity length 100")]
365 fn test_try_new_mismatch_validity_len() {
366 StringVector::try_new(
367 buffer![BinaryView::new_inlined(b"inlined")],
368 Arc::new(Box::new([])),
369 Mask::new_true(100),
370 )
371 .unwrap();
372 }
373
374 #[test]
375 #[should_panic(
376 expected = "view at index 0 references invalid buffer: 100 out of bounds for BinaryViewVector with 0 buffers"
377 )]
378 fn test_try_new_invalid_buffer_offset() {
379 StringVector::try_new(
380 buffer![BinaryView::make_view(b"bad buffer ptr", 100, 0)],
381 Arc::new(Box::new([])),
382 Mask::new_true(1),
383 )
384 .unwrap();
385 }
386
387 #[test]
388 #[should_panic(expected = "start offset 4294967295 out of bounds for buffer 0 with size 19")]
389 fn test_try_new_invalid_length() {
390 StringVector::try_new(
391 buffer![BinaryView::make_view(b"bad buffer ptr", 0, u32::MAX)],
392 Arc::new(Box::new([ByteBuffer::copy_from(b"a very short buffer")])),
393 Mask::new_true(1),
394 )
395 .unwrap();
396 }
397
398 #[test]
399 #[should_panic(expected = "view at index 0: inlined bytes failed utf-8 validation")]
400 fn test_try_new_invalid_utf8_inlined() {
401 StringVector::try_new(
402 buffer![BinaryView::new_inlined(b"\x80")],
403 Arc::new(Box::new([])),
404 Mask::new_true(1),
405 )
406 .unwrap();
407 }
408
409 #[test]
410 #[should_panic(expected = "view at index 0: outlined bytes failed utf-8 validation")]
411 fn test_try_new_invalid_utf8_outlined() {
412 let sequence = b"\xff".repeat(13);
414 StringVector::try_new(
415 buffer![BinaryView::make_view(&sequence, 0, 0)],
416 Arc::new(Box::new([ByteBuffer::copy_from(sequence)])),
417 Mask::new_true(1),
418 )
419 .unwrap();
420 }
421
422 #[test]
423 fn test_try_into_mut() {
424 let mut shared_vec = StringVectorMut::with_capacity(5);
425 shared_vec.append_nulls(2);
426 shared_vec.append_values("an example value", 2);
427 shared_vec.append_values("another example value", 1);
428
429 let shared_vec = shared_vec.freeze();
430
431 let shared_vec2 = shared_vec.clone();
433
434 let shared_vec = shared_vec.try_into_mut().unwrap_err();
437
438 drop(shared_vec2);
440
441 assert!(shared_vec.try_into_mut().is_ok());
442 }
443
444 #[test]
445 fn test_binaryview_eq_identical_inlined() {
446 let mut v1 = StringVectorMut::with_capacity(3);
448 v1.append_values("hello", 1);
449 v1.append_values("world", 1);
450 v1.append_values("test", 1);
451 let v1 = v1.freeze();
452
453 let mut v2 = StringVectorMut::with_capacity(3);
454 v2.append_values("hello", 1);
455 v2.append_values("world", 1);
456 v2.append_values("test", 1);
457 let v2 = v2.freeze();
458
459 assert_eq!(v1, v2);
460 }
461
462 #[test]
463 fn test_binaryview_eq_identical_outlined() {
464 let mut v1 = StringVectorMut::with_capacity(2);
466 v1.append_values("this is a longer string that won't be inlined", 1);
467 v1.append_values("another long string for testing purposes", 1);
468 let v1 = v1.freeze();
469
470 let mut v2 = StringVectorMut::with_capacity(2);
471 v2.append_values("this is a longer string that won't be inlined", 1);
472 v2.append_values("another long string for testing purposes", 1);
473 let v2 = v2.freeze();
474
475 assert_eq!(v1, v2);
476 }
477
478 #[test]
479 fn test_binaryview_eq_different_length() {
480 let mut v1 = StringVectorMut::with_capacity(3);
481 v1.append_values("a", 1);
482 v1.append_values("b", 1);
483 v1.append_values("c", 1);
484 let v1 = v1.freeze();
485
486 let mut v2 = StringVectorMut::with_capacity(2);
487 v2.append_values("a", 1);
488 v2.append_values("b", 1);
489 let v2 = v2.freeze();
490
491 assert_ne!(v1, v2);
492 }
493
494 #[test]
495 fn test_binaryview_eq_different_validity() {
496 let mut v1 = StringVectorMut::with_capacity(3);
497 v1.append_values("a", 1);
498 v1.append_values("b", 1);
499 v1.append_values("c", 1);
500 let v1 = v1.freeze();
501
502 let mut v2 = StringVectorMut::with_capacity(3);
503 v2.append_values("a", 1);
504 v2.append_nulls(1);
505 v2.append_values("c", 1);
506 let v2 = v2.freeze();
507
508 assert_ne!(v1, v2);
509 }
510
511 #[test]
512 fn test_binaryview_eq_different_values() {
513 let mut v1 = StringVectorMut::with_capacity(3);
514 v1.append_values("hello", 1);
515 v1.append_values("world", 1);
516 v1.append_values("test", 1);
517 let v1 = v1.freeze();
518
519 let mut v2 = StringVectorMut::with_capacity(3);
520 v2.append_values("hello", 1);
521 v2.append_values("DIFFERENT", 1);
522 v2.append_values("test", 1);
523 let v2 = v2.freeze();
524
525 assert_ne!(v1, v2);
526 }
527
528 #[test]
529 fn test_binaryview_eq_ignores_invalid_positions_inlined() {
530 let mut v1 = StringVectorMut::with_capacity(3);
532 v1.append_values("hello", 1);
533 v1.append_values("value_a", 1); v1.append_values("test", 1);
535 let mut v1 = v1.freeze();
536 v1.mask_validity(&Mask::from_iter([true, false, true]));
538
539 let mut v2 = StringVectorMut::with_capacity(3);
540 v2.append_values("hello", 1);
541 v2.append_values("value_b", 1); v2.append_values("test", 1);
543 let mut v2 = v2.freeze();
544 v2.mask_validity(&Mask::from_iter([true, false, true]));
545
546 assert_eq!(v1, v2);
547 }
548
549 #[test]
550 fn test_binaryview_eq_ignores_invalid_positions_outlined() {
551 let mut v1 = StringVectorMut::with_capacity(3);
553 v1.append_values("this is a very long string that will be outlined", 1);
554 v1.append_values("another long value that differs between vectors A", 1);
555 v1.append_values("yet another long string for the test", 1);
556 let mut v1 = v1.freeze();
557 v1.mask_validity(&Mask::from_iter([true, false, true]));
558
559 let mut v2 = StringVectorMut::with_capacity(3);
560 v2.append_values("this is a very long string that will be outlined", 1);
561 v2.append_values("different long value at the invalid position B", 1);
562 v2.append_values("yet another long string for the test", 1);
563 let mut v2 = v2.freeze();
564 v2.mask_validity(&Mask::from_iter([true, false, true]));
565
566 assert_eq!(v1, v2);
567 }
568
569 #[test]
570 fn test_binaryview_eq_empty() {
571 let v1 = StringVectorMut::with_capacity(0).freeze();
572 let v2 = StringVectorMut::with_capacity(0).freeze();
573
574 assert_eq!(v1, v2);
575 }
576
577 #[test]
578 fn test_binaryview_eq_all_nulls() {
579 let mut v1 = StringVectorMut::with_capacity(3);
580 v1.append_nulls(3);
581 let v1 = v1.freeze();
582
583 let mut v2 = StringVectorMut::with_capacity(3);
584 v2.append_nulls(3);
585 let v2 = v2.freeze();
586
587 assert_eq!(v1, v2);
588 }
589}