vortex_array/arrays/varbinview/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::{Debug, Formatter};
5use std::hash::{Hash, Hasher};
6use std::ops::Range;
7use std::sync::Arc;
8
9use static_assertions::{assert_eq_align, assert_eq_size};
10use vortex_buffer::{Buffer, ByteBuffer};
11use vortex_dtype::{DType, Nullability};
12use vortex_error::{
13    VortexExpect, VortexResult, VortexUnwrap, vortex_bail, vortex_ensure, vortex_err, vortex_panic,
14};
15
16use crate::builders::{ArrayBuilder, VarBinViewBuilder};
17use crate::stats::{ArrayStats, StatsSetRef};
18use crate::validity::Validity;
19use crate::vtable::{
20    ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
21    ValidityVTableFromValidityHelper,
22};
23use crate::{Canonical, EncodingId, EncodingRef, vtable};
24
25mod accessor;
26mod compact;
27mod compute;
28mod ops;
29mod serde;
30
31#[derive(Clone, Copy, Debug, PartialEq, Eq)]
32#[repr(C, align(8))]
33pub struct Inlined {
34    size: u32,
35    data: [u8; BinaryView::MAX_INLINED_SIZE],
36}
37
38impl Inlined {
39    fn new<const N: usize>(value: &[u8]) -> Self {
40        let mut inlined = Self {
41            size: N.try_into().vortex_unwrap(),
42            data: [0u8; BinaryView::MAX_INLINED_SIZE],
43        };
44        inlined.data[..N].copy_from_slice(&value[..N]);
45        inlined
46    }
47
48    #[inline]
49    pub fn value(&self) -> &[u8] {
50        &self.data[0..(self.size as usize)]
51    }
52}
53
54#[derive(Clone, Copy, Debug)]
55#[repr(C, align(8))]
56pub struct Ref {
57    size: u32,
58    prefix: [u8; 4],
59    buffer_index: u32,
60    offset: u32,
61}
62
63impl Ref {
64    pub fn new(size: u32, prefix: [u8; 4], buffer_index: u32, offset: u32) -> Self {
65        Self {
66            size,
67            prefix,
68            buffer_index,
69            offset,
70        }
71    }
72
73    #[inline]
74    pub fn buffer_index(&self) -> u32 {
75        self.buffer_index
76    }
77
78    #[inline]
79    pub fn offset(&self) -> u32 {
80        self.offset
81    }
82
83    #[inline]
84    pub fn prefix(&self) -> &[u8; 4] {
85        &self.prefix
86    }
87
88    #[inline]
89    pub fn to_range(&self) -> Range<usize> {
90        self.offset as usize..(self.offset + self.size) as usize
91    }
92}
93
94#[derive(Clone, Copy)]
95#[repr(C, align(16))]
96pub union BinaryView {
97    // Numeric representation. This is logically `u128`, but we split it into the high and low
98    // bits to preserve the alignment.
99    le_bytes: [u8; 16],
100
101    // Inlined representation: strings <= 12 bytes
102    inlined: Inlined,
103
104    // Reference type: strings > 12 bytes.
105    _ref: Ref,
106}
107
108assert_eq_size!(BinaryView, [u8; 16]);
109assert_eq_size!(Inlined, [u8; 16]);
110assert_eq_size!(Ref, [u8; 16]);
111assert_eq_align!(BinaryView, u128);
112
113impl Hash for BinaryView {
114    fn hash<H: Hasher>(&self, state: &mut H) {
115        unsafe { std::mem::transmute::<&BinaryView, &[u8; 16]>(self) }.hash(state);
116    }
117}
118
119impl Default for BinaryView {
120    fn default() -> Self {
121        Self::make_view(&[], 0, 0)
122    }
123}
124
125impl BinaryView {
126    pub const MAX_INLINED_SIZE: usize = 12;
127
128    /// Create a view from a value, block and offset
129    ///
130    /// Depending on the length of the provided value either a new inlined
131    /// or a reference view will be constructed.
132    ///
133    /// Adapted from arrow-rs <https://github.com/apache/arrow-rs/blob/f4fde769ab6e1a9b75f890b7f8b47bc22800830b/arrow-array/src/builder/generic_bytes_view_builder.rs#L524>
134    /// Explicitly enumerating inlined view produces code that avoids calling generic `ptr::copy_non_interleave` that's slower than explicit stores
135    #[inline(never)]
136    pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self {
137        match value.len() {
138            0 => Self {
139                inlined: Inlined::new::<0>(value),
140            },
141            1 => Self {
142                inlined: Inlined::new::<1>(value),
143            },
144            2 => Self {
145                inlined: Inlined::new::<2>(value),
146            },
147            3 => Self {
148                inlined: Inlined::new::<3>(value),
149            },
150            4 => Self {
151                inlined: Inlined::new::<4>(value),
152            },
153            5 => Self {
154                inlined: Inlined::new::<5>(value),
155            },
156            6 => Self {
157                inlined: Inlined::new::<6>(value),
158            },
159            7 => Self {
160                inlined: Inlined::new::<7>(value),
161            },
162            8 => Self {
163                inlined: Inlined::new::<8>(value),
164            },
165            9 => Self {
166                inlined: Inlined::new::<9>(value),
167            },
168            10 => Self {
169                inlined: Inlined::new::<10>(value),
170            },
171            11 => Self {
172                inlined: Inlined::new::<11>(value),
173            },
174            12 => Self {
175                inlined: Inlined::new::<12>(value),
176            },
177            _ => Self {
178                _ref: Ref::new(
179                    u32::try_from(value.len()).vortex_unwrap(),
180                    value[0..4].try_into().vortex_unwrap(),
181                    block,
182                    offset,
183                ),
184            },
185        }
186    }
187
188    /// Create a new empty view
189    #[inline]
190    pub fn empty_view() -> Self {
191        Self::new_inlined(&[])
192    }
193
194    /// Create a new inlined binary view
195    #[inline]
196    pub fn new_inlined(value: &[u8]) -> Self {
197        assert!(
198            value.len() <= Self::MAX_INLINED_SIZE,
199            "expected inlined value to be <= 12 bytes, was {}",
200            value.len()
201        );
202
203        Self::make_view(value, 0, 0)
204    }
205
206    #[inline]
207    pub fn len(&self) -> u32 {
208        unsafe { self.inlined.size }
209    }
210
211    #[inline]
212    pub fn is_empty(&self) -> bool {
213        self.len() > 0
214    }
215
216    #[inline]
217    #[allow(clippy::cast_possible_truncation)]
218    pub fn is_inlined(&self) -> bool {
219        self.len() <= (Self::MAX_INLINED_SIZE as u32)
220    }
221
222    pub fn as_inlined(&self) -> &Inlined {
223        unsafe { &self.inlined }
224    }
225
226    pub fn as_view(&self) -> &Ref {
227        unsafe { &self._ref }
228    }
229
230    pub fn as_u128(&self) -> u128 {
231        // SAFETY: binary view always safe to read as u128 LE bytes
232        unsafe { u128::from_le_bytes(self.le_bytes) }
233    }
234
235    /// Override the buffer reference with the given buffer_idx, only if this view is not inlined.
236    #[inline(always)]
237    pub fn with_buffer_idx(self, buffer_idx: u32) -> Self {
238        if self.is_inlined() {
239            self
240        } else {
241            // Referencing views must have their buffer_index adjusted with new offsets
242            let view_ref = self.as_view();
243            Self {
244                _ref: Ref::new(
245                    self.len(),
246                    *view_ref.prefix(),
247                    buffer_idx,
248                    view_ref.offset(),
249                ),
250            }
251        }
252    }
253
254    /// Shifts the buffer reference by the view by a given offset, useful when merging many
255    /// varbinview arrays into one.
256    #[inline(always)]
257    pub fn offset_view(self, offset: u32) -> Self {
258        if self.is_inlined() {
259            self
260        } else {
261            // Referencing views must have their buffer_index adjusted with new offsets
262            let view_ref = self.as_view();
263            Self {
264                _ref: Ref::new(
265                    self.len(),
266                    *view_ref.prefix(),
267                    offset + view_ref.buffer_index(),
268                    view_ref.offset(),
269                ),
270            }
271        }
272    }
273}
274
275impl From<u128> for BinaryView {
276    fn from(value: u128) -> Self {
277        BinaryView {
278            le_bytes: value.to_le_bytes(),
279        }
280    }
281}
282
283impl Debug for BinaryView {
284    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
285        let mut s = f.debug_struct("BinaryView");
286        if self.is_inlined() {
287            s.field("inline", &"i".to_string());
288        } else {
289            s.field("ref", &"r".to_string());
290        }
291        s.finish()
292    }
293}
294
295vtable!(VarBinView);
296
297impl VTable for VarBinViewVTable {
298    type Array = VarBinViewArray;
299    type Encoding = VarBinViewEncoding;
300
301    type ArrayVTable = Self;
302    type CanonicalVTable = Self;
303    type OperationsVTable = Self;
304    type ValidityVTable = ValidityVTableFromValidityHelper;
305    type VisitorVTable = Self;
306    type ComputeVTable = NotSupported;
307    type EncodeVTable = NotSupported;
308    type PipelineVTable = NotSupported;
309    type SerdeVTable = Self;
310
311    fn id(_encoding: &Self::Encoding) -> EncodingId {
312        EncodingId::new_ref("vortex.varbinview")
313    }
314
315    fn encoding(_array: &Self::Array) -> EncodingRef {
316        EncodingRef::new_ref(VarBinViewEncoding.as_ref())
317    }
318}
319
320/// A variable-length binary view array that stores strings and binary data efficiently.
321///
322/// This mirrors the Apache Arrow StringView/BinaryView array encoding and provides
323/// an optimized representation for variable-length data with excellent performance
324/// characteristics for both short and long strings.
325///
326/// ## Data Layout
327///
328/// The array uses a hybrid storage approach with two main components:
329/// - **Views buffer**: Array of 16-byte `BinaryView` entries (one per logical element)
330/// - **Data buffers**: Shared backing storage for strings longer than 12 bytes
331///
332/// ## View Structure
333///
334/// Commonly referred to as "German Strings", each 16-byte view entry contains either:
335/// - **Inlined data**: For strings ≤ 12 bytes, the entire string is stored directly in the view
336/// - **Reference data**: For strings > 12 bytes, contains:
337///   - String length (4 bytes)
338///   - First 4 bytes of string as prefix (4 bytes)
339///   - Buffer index and offset (8 bytes total)
340///
341/// The following ASCII graphic is reproduced verbatim from the Arrow documentation:
342///
343/// ```text
344///                         ┌──────┬────────────────────────┐
345///                         │length│      string value      │
346///    Strings (len <= 12)  │      │    (padded with 0)     │
347///                         └──────┴────────────────────────┘
348///                          0    31                      127
349///
350///                         ┌───────┬───────┬───────┬───────┐
351///                         │length │prefix │  buf  │offset │
352///    Strings (len > 12)   │       │       │ index │       │
353///                         └───────┴───────┴───────┴───────┘
354///                          0    31       63      95    127
355/// ```
356///
357/// # Examples
358///
359/// ```
360/// use vortex_array::arrays::VarBinViewArray;
361/// use vortex_dtype::{DType, Nullability};
362/// use vortex_array::IntoArray;
363///
364/// // Create from an Iterator<Item = &str>
365/// let array = VarBinViewArray::from_iter_str([
366///         "inlined",
367///         "this string is outlined"
368/// ]);
369///
370/// assert_eq!(array.len(), 2);
371///
372/// // Access individual strings
373/// let first = array.bytes_at(0);
374/// assert_eq!(first.as_slice(), b"inlined"); // "short"
375///
376/// let second = array.bytes_at(1);
377/// assert_eq!(second.as_slice(), b"this string is outlined"); // Long string
378/// ```
379#[derive(Clone, Debug)]
380pub struct VarBinViewArray {
381    dtype: DType,
382    buffers: Arc<[ByteBuffer]>,
383    views: Buffer<BinaryView>,
384    validity: Validity,
385    stats_set: ArrayStats,
386}
387
388#[derive(Clone, Debug)]
389pub struct VarBinViewEncoding;
390
391impl VarBinViewArray {
392    /// Creates a new [`VarBinViewArray`].
393    ///
394    /// # Panics
395    ///
396    /// Panics if the provided components do not satisfy the invariants documented
397    /// in [`VarBinViewArray::new_unchecked`].
398    pub fn new(
399        views: Buffer<BinaryView>,
400        buffers: Arc<[ByteBuffer]>,
401        dtype: DType,
402        validity: Validity,
403    ) -> Self {
404        Self::try_new(views, buffers, dtype, validity)
405            .vortex_expect("VarBinViewArray construction failed")
406    }
407
408    /// Constructs a new `VarBinViewArray`.
409    ///
410    /// See [`VarBinViewArray::new_unchecked`] for more information.
411    ///
412    /// # Errors
413    ///
414    /// Returns an error if the provided components do not satisfy the invariants documented in
415    /// [`VarBinViewArray::new_unchecked`].
416    pub fn try_new(
417        views: Buffer<BinaryView>,
418        buffers: Arc<[ByteBuffer]>,
419        dtype: DType,
420        validity: Validity,
421    ) -> VortexResult<Self> {
422        Self::validate(&views, &buffers, &dtype, &validity)?;
423
424        // SAFETY: validate ensures all invariants are met.
425        Ok(unsafe { Self::new_unchecked(views, buffers, dtype, validity) })
426    }
427
428    /// Creates a new [`VarBinViewArray`] without validation from these components:
429    ///
430    /// * `views` is a buffer of 16-byte view entries (one per logical element).
431    /// * `buffers` contains the backing storage for strings longer than 12 bytes.
432    /// * `dtype` specifies whether this contains UTF-8 strings or binary data.
433    /// * `validity` holds the null values.
434    ///
435    /// # Safety
436    ///
437    /// The caller must ensure all of the following invariants are satisfied:
438    ///
439    /// ## View Requirements
440    ///
441    /// - Views must be properly formatted 16-byte [`BinaryView`] entries.
442    /// - Inlined views (length ≤ 12) must have valid data in the first `length` bytes.
443    /// - Reference views (length > 12) must:
444    ///   - Have a valid buffer index < `buffers.len()`.
445    ///   - Have valid offsets that don't exceed the referenced buffer's bounds.
446    ///   - Have a 4-byte prefix that matches the actual data at the referenced location.
447    ///
448    /// ## Type Requirements
449    ///
450    /// - `dtype` must be either [`DType::Utf8`] or [`DType::Binary`].
451    /// - For [`DType::Utf8`], all string data (both inlined and referenced) must be valid UTF-8.
452    ///
453    /// ## Validity Requirements
454    ///
455    /// - The validity must have the same nullability as the dtype.
456    /// - If validity is an array, its length must match `views.len()`.
457    pub unsafe fn new_unchecked(
458        views: Buffer<BinaryView>,
459        buffers: Arc<[ByteBuffer]>,
460        dtype: DType,
461        validity: Validity,
462    ) -> Self {
463        Self {
464            dtype,
465            buffers,
466            views,
467            validity,
468            stats_set: Default::default(),
469        }
470    }
471
472    /// Validates the components that would be used to create a [`VarBinViewArray`].
473    ///
474    /// This function checks all the invariants required by [`VarBinViewArray::new_unchecked`].
475    pub(crate) fn validate(
476        views: &Buffer<BinaryView>,
477        buffers: &Arc<[ByteBuffer]>,
478        dtype: &DType,
479        validity: &Validity,
480    ) -> VortexResult<()> {
481        vortex_ensure!(
482            validity.nullability() == dtype.nullability(),
483            "validity {:?} incompatible with nullability {:?}",
484            validity,
485            dtype.nullability()
486        );
487
488        match dtype {
489            DType::Utf8(_) => Self::validate_views(views, buffers, validity, |string| {
490                simdutf8::basic::from_utf8(string).is_ok()
491            })?,
492            DType::Binary(_) => Self::validate_views(views, buffers, validity, |_| true)?,
493            _ => vortex_bail!("invalid DType {dtype} for `VarBinViewArray`"),
494        }
495
496        Ok(())
497    }
498
499    fn validate_views<F>(
500        views: &Buffer<BinaryView>,
501        buffers: &Arc<[ByteBuffer]>,
502        validity: &Validity,
503        validator: F,
504    ) -> VortexResult<()>
505    where
506        F: Fn(&[u8]) -> bool,
507    {
508        for (idx, &view) in views.iter().enumerate() {
509            if validity.is_null(idx) {
510                continue;
511            }
512
513            if view.is_inlined() {
514                // Validate the inline bytestring
515                let bytes = &unsafe { view.inlined }.data[..view.len() as usize];
516                vortex_ensure!(
517                    validator(bytes),
518                    "view at index {idx}: inlined bytes failed utf-8 validation"
519                );
520            } else {
521                // Validate the view pointer
522                let view = view.as_view();
523                let buf_index = view.buffer_index as usize;
524                let start_offset = view.offset as usize;
525                let end_offset = start_offset.saturating_add(view.size as usize);
526
527                let buf = buffers.get(buf_index).ok_or_else(||
528                    vortex_err!("view at index {idx} references invalid buffer: {buf_index} out of bounds for VarBinViewArray with {} buffers",
529                        buffers.len()))?;
530
531                vortex_ensure!(
532                    start_offset < buf.len(),
533                    "start offset {start_offset} out of bounds for buffer {buf_index} with size {}",
534                    buf.len(),
535                );
536
537                vortex_ensure!(
538                    end_offset <= buf.len(),
539                    "end offset {end_offset} out of bounds for buffer {buf_index} with size {}",
540                    buf.len(),
541                );
542
543                // Make sure the prefix data matches the buffer data.
544                let bytes = &buf[start_offset..end_offset];
545                vortex_ensure!(
546                    view.prefix == bytes[..4],
547                    "VarBinView prefix does not match full string"
548                );
549
550                // Validate the full string
551                vortex_ensure!(
552                    validator(bytes),
553                    "view at index {idx}: outlined bytes fails utf-8 validation"
554                );
555            }
556        }
557
558        Ok(())
559    }
560
561    /// Number of raw string data buffers held by this array.
562    pub fn nbuffers(&self) -> usize {
563        self.buffers.len()
564    }
565
566    /// Access to the primitive views buffer.
567    ///
568    /// Variable-sized binary view buffer contain a "view" child array, with 16-byte entries that
569    /// contain either a pointer into one of the array's owned `buffer`s OR an inlined copy of
570    /// the string (if the string has 12 bytes or fewer).
571    #[inline]
572    pub fn views(&self) -> &Buffer<BinaryView> {
573        &self.views
574    }
575
576    /// Access value bytes at a given index
577    ///
578    /// Will return a `ByteBuffer` containing the data without performing a copy.
579    #[inline]
580    pub fn bytes_at(&self, index: usize) -> ByteBuffer {
581        let views = self.views();
582        let view = &views[index];
583        // Expect this to be the common case: strings > 12 bytes.
584        if !view.is_inlined() {
585            let view_ref = view.as_view();
586            self.buffer(view_ref.buffer_index() as usize)
587                .slice(view_ref.to_range())
588        } else {
589            // Return access to the range of bytes around it.
590            views
591                .clone()
592                .into_byte_buffer()
593                .slice_ref(view.as_inlined().value())
594        }
595    }
596
597    /// Access one of the backing data buffers.
598    ///
599    /// # Panics
600    ///
601    /// This method panics if the provided index is out of bounds for the set of buffers provided
602    /// at construction time.
603    #[inline]
604    pub fn buffer(&self, idx: usize) -> &ByteBuffer {
605        if idx >= self.nbuffers() {
606            vortex_panic!(
607                "{idx} buffer index out of bounds, there are {} buffers",
608                self.nbuffers()
609            );
610        }
611        &self.buffers[idx]
612    }
613
614    /// Iterate over the underlying raw data buffers, not including the views buffer.
615    #[inline]
616    pub fn buffers(&self) -> &Arc<[ByteBuffer]> {
617        &self.buffers
618    }
619
620    /// Accumulate an iterable set of values into our type here.
621    #[allow(clippy::same_name_method)]
622    pub fn from_iter<T: AsRef<[u8]>, I: IntoIterator<Item = Option<T>>>(
623        iter: I,
624        dtype: DType,
625    ) -> Self {
626        let iter = iter.into_iter();
627        let mut builder = VarBinViewBuilder::with_capacity(dtype, iter.size_hint().0);
628
629        for item in iter {
630            match item {
631                None => builder.append_null(),
632                Some(v) => builder.append_value(v),
633            }
634        }
635
636        builder.finish_into_varbinview()
637    }
638
639    pub fn from_iter_str<T: AsRef<str>, I: IntoIterator<Item = T>>(iter: I) -> Self {
640        let iter = iter.into_iter();
641        let mut builder = VarBinViewBuilder::with_capacity(
642            DType::Utf8(Nullability::NonNullable),
643            iter.size_hint().0,
644        );
645
646        for item in iter {
647            builder.append_value(item.as_ref());
648        }
649
650        builder.finish_into_varbinview()
651    }
652
653    pub fn from_iter_nullable_str<T: AsRef<str>, I: IntoIterator<Item = Option<T>>>(
654        iter: I,
655    ) -> Self {
656        let iter = iter.into_iter();
657        let mut builder = VarBinViewBuilder::with_capacity(
658            DType::Utf8(Nullability::Nullable),
659            iter.size_hint().0,
660        );
661
662        for item in iter {
663            match item {
664                None => builder.append_null(),
665                Some(v) => builder.append_value(v.as_ref()),
666            }
667        }
668
669        builder.finish_into_varbinview()
670    }
671
672    pub fn from_iter_bin<T: AsRef<[u8]>, I: IntoIterator<Item = T>>(iter: I) -> Self {
673        let iter = iter.into_iter();
674        let mut builder = VarBinViewBuilder::with_capacity(
675            DType::Binary(Nullability::NonNullable),
676            iter.size_hint().0,
677        );
678
679        for item in iter {
680            builder.append_value(item.as_ref());
681        }
682
683        builder.finish_into_varbinview()
684    }
685
686    pub fn from_iter_nullable_bin<T: AsRef<[u8]>, I: IntoIterator<Item = Option<T>>>(
687        iter: I,
688    ) -> Self {
689        let iter = iter.into_iter();
690        let mut builder = VarBinViewBuilder::with_capacity(
691            DType::Binary(Nullability::Nullable),
692            iter.size_hint().0,
693        );
694
695        for item in iter {
696            match item {
697                None => builder.append_null(),
698                Some(v) => builder.append_value(v.as_ref()),
699            }
700        }
701
702        builder.finish_into_varbinview()
703    }
704}
705
706impl ArrayVTable<VarBinViewVTable> for VarBinViewVTable {
707    fn len(array: &VarBinViewArray) -> usize {
708        array.views.len()
709    }
710
711    fn dtype(array: &VarBinViewArray) -> &DType {
712        &array.dtype
713    }
714
715    fn stats(array: &VarBinViewArray) -> StatsSetRef<'_> {
716        array.stats_set.to_ref(array.as_ref())
717    }
718}
719
720impl ValidityHelper for VarBinViewArray {
721    fn validity(&self) -> &Validity {
722        &self.validity
723    }
724}
725
726impl CanonicalVTable<VarBinViewVTable> for VarBinViewVTable {
727    fn canonicalize(array: &VarBinViewArray) -> Canonical {
728        Canonical::VarBinView(array.clone())
729    }
730
731    fn append_to_builder(array: &VarBinViewArray, builder: &mut dyn ArrayBuilder) {
732        builder.extend_from_array(array.as_ref())
733    }
734}
735
736impl<'a> FromIterator<Option<&'a [u8]>> for VarBinViewArray {
737    fn from_iter<T: IntoIterator<Item = Option<&'a [u8]>>>(iter: T) -> Self {
738        Self::from_iter_nullable_bin(iter)
739    }
740}
741
742impl FromIterator<Option<Vec<u8>>> for VarBinViewArray {
743    fn from_iter<T: IntoIterator<Item = Option<Vec<u8>>>>(iter: T) -> Self {
744        Self::from_iter_nullable_bin(iter)
745    }
746}
747
748impl FromIterator<Option<String>> for VarBinViewArray {
749    fn from_iter<T: IntoIterator<Item = Option<String>>>(iter: T) -> Self {
750        Self::from_iter_nullable_str(iter)
751    }
752}
753
754impl<'a> FromIterator<Option<&'a str>> for VarBinViewArray {
755    fn from_iter<T: IntoIterator<Item = Option<&'a str>>>(iter: T) -> Self {
756        Self::from_iter_nullable_str(iter)
757    }
758}
759
760#[cfg(test)]
761mod test {
762    use vortex_scalar::Scalar;
763
764    use crate::arrays::varbinview::{BinaryView, VarBinViewArray};
765    use crate::{Array, ToCanonical};
766
767    #[test]
768    pub fn varbin_view() {
769        let binary_arr =
770            VarBinViewArray::from_iter_str(["hello world", "hello world this is a long string"]);
771        assert_eq!(binary_arr.len(), 2);
772        assert_eq!(binary_arr.scalar_at(0), Scalar::from("hello world"));
773        assert_eq!(
774            binary_arr.scalar_at(1),
775            Scalar::from("hello world this is a long string")
776        );
777    }
778
779    #[test]
780    pub fn slice_array() {
781        let binary_arr =
782            VarBinViewArray::from_iter_str(["hello world", "hello world this is a long string"])
783                .slice(1..2);
784        assert_eq!(
785            binary_arr.scalar_at(0),
786            Scalar::from("hello world this is a long string")
787        );
788    }
789
790    #[test]
791    pub fn flatten_array() {
792        let binary_arr = VarBinViewArray::from_iter_str(["string1", "string2"]);
793        let var_bin = binary_arr.to_varbinview();
794        assert_eq!(var_bin.scalar_at(0), Scalar::from("string1"));
795        assert_eq!(var_bin.scalar_at(1), Scalar::from("string2"));
796    }
797
798    #[test]
799    pub fn binary_view_size_and_alignment() {
800        assert_eq!(size_of::<BinaryView>(), 16);
801        assert_eq!(align_of::<BinaryView>(), 16);
802    }
803}