Skip to main content

polars_arrow/array/binview/
mod.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2//! See thread: https://lists.apache.org/thread/w88tpz76ox8h3rxkjl4so6rg3f1rv7wt
3
4mod builder;
5pub use builder::*;
6mod ffi;
7pub(super) mod fmt;
8mod iterator;
9mod mutable;
10#[cfg(feature = "proptest")]
11pub mod proptest;
12mod view;
13
14use std::any::Any;
15use std::fmt::Debug;
16use std::marker::PhantomData;
17
18use polars_buffer::Buffer;
19use polars_error::*;
20use polars_utils::relaxed_cell::RelaxedCell;
21
22use crate::array::Array;
23use crate::bitmap::Bitmap;
24use crate::datatypes::ArrowDataType;
25
26mod private {
27    pub trait Sealed: Send + Sync {}
28
29    impl Sealed for str {}
30    impl Sealed for [u8] {}
31}
32pub use iterator::BinaryViewValueIter;
33pub use mutable::MutableBinaryViewArray;
34use polars_utils::aliases::{InitHashMaps, PlHashMap};
35use private::Sealed;
36
37use crate::array::binview::view::{validate_binary_views, validate_views_utf8_only};
38use crate::array::iterator::NonNullValuesIter;
39use crate::bitmap::utils::{BitmapIter, ZipValidity};
40pub type BinaryViewArray = BinaryViewArrayGeneric<[u8]>;
41pub type Utf8ViewArray = BinaryViewArrayGeneric<str>;
42pub type BinaryViewArrayBuilder = BinaryViewArrayGenericBuilder<[u8]>;
43pub type Utf8ViewArrayBuilder = BinaryViewArrayGenericBuilder<str>;
44pub use view::{View, validate_utf8_views};
45
46use super::Splitable;
47
48pub type MutablePlString = MutableBinaryViewArray<str>;
49pub type MutablePlBinary = MutableBinaryViewArray<[u8]>;
50
51static BIN_VIEW_TYPE: ArrowDataType = ArrowDataType::BinaryView;
52static UTF8_VIEW_TYPE: ArrowDataType = ArrowDataType::Utf8View;
53
54// Growth parameters of view array buffers.
55const DEFAULT_BLOCK_SIZE: usize = 8 * 1024;
56const MAX_EXP_BLOCK_SIZE: usize = 16 * 1024 * 1024;
57
58pub trait ViewType: Sealed + 'static + PartialEq + AsRef<Self> {
59    const IS_UTF8: bool;
60    const DATA_TYPE: ArrowDataType;
61    type Owned: Debug + Clone + Sync + Send + AsRef<Self>;
62
63    /// # Safety
64    /// The caller must ensure that `slice` is a valid view.
65    unsafe fn from_bytes_unchecked(slice: &[u8]) -> &Self;
66    fn from_bytes(slice: &[u8]) -> Option<&Self>;
67
68    fn to_bytes(&self) -> &[u8];
69
70    #[allow(clippy::wrong_self_convention)]
71    fn into_owned(&self) -> Self::Owned;
72
73    fn dtype() -> &'static ArrowDataType;
74}
75
76impl ViewType for str {
77    const IS_UTF8: bool = true;
78    const DATA_TYPE: ArrowDataType = ArrowDataType::Utf8View;
79    type Owned = String;
80
81    #[inline(always)]
82    unsafe fn from_bytes_unchecked(slice: &[u8]) -> &Self {
83        std::str::from_utf8_unchecked(slice)
84    }
85    #[inline(always)]
86    fn from_bytes(slice: &[u8]) -> Option<&Self> {
87        std::str::from_utf8(slice).ok()
88    }
89
90    #[inline(always)]
91    fn to_bytes(&self) -> &[u8] {
92        self.as_bytes()
93    }
94
95    fn into_owned(&self) -> Self::Owned {
96        self.to_string()
97    }
98    fn dtype() -> &'static ArrowDataType {
99        &UTF8_VIEW_TYPE
100    }
101}
102
103impl ViewType for [u8] {
104    const IS_UTF8: bool = false;
105    const DATA_TYPE: ArrowDataType = ArrowDataType::BinaryView;
106    type Owned = Vec<u8>;
107
108    #[inline(always)]
109    unsafe fn from_bytes_unchecked(slice: &[u8]) -> &Self {
110        slice
111    }
112    #[inline(always)]
113    fn from_bytes(slice: &[u8]) -> Option<&Self> {
114        Some(slice)
115    }
116
117    #[inline(always)]
118    fn to_bytes(&self) -> &[u8] {
119        self
120    }
121
122    fn into_owned(&self) -> Self::Owned {
123        self.to_vec()
124    }
125
126    fn dtype() -> &'static ArrowDataType {
127        &BIN_VIEW_TYPE
128    }
129}
130
131pub struct BinaryViewArrayGeneric<T: ViewType + ?Sized> {
132    dtype: ArrowDataType,
133    views: Buffer<View>,
134    buffers: Buffer<Buffer<u8>>,
135    validity: Option<Bitmap>,
136    phantom: PhantomData<T>,
137    /// Total bytes length if we would concatenate them all.
138    total_bytes_len: RelaxedCell<u64>,
139    /// Total bytes in the buffer (excluding remaining capacity)
140    total_buffer_len: usize,
141}
142
143impl<T: ViewType + ?Sized> PartialEq for BinaryViewArrayGeneric<T> {
144    fn eq(&self, other: &Self) -> bool {
145        self.len() == other.len() && self.into_iter().zip(other).all(|(l, r)| l == r)
146    }
147}
148
149impl<T: ViewType + ?Sized> Clone for BinaryViewArrayGeneric<T> {
150    fn clone(&self) -> Self {
151        Self {
152            dtype: self.dtype.clone(),
153            views: self.views.clone(),
154            buffers: self.buffers.clone(),
155            validity: self.validity.clone(),
156            phantom: Default::default(),
157            total_bytes_len: self.total_bytes_len.clone(),
158            total_buffer_len: self.total_buffer_len,
159        }
160    }
161}
162
163unsafe impl<T: ViewType + ?Sized> Send for BinaryViewArrayGeneric<T> {}
164unsafe impl<T: ViewType + ?Sized> Sync for BinaryViewArrayGeneric<T> {}
165
166const UNKNOWN_LEN: u64 = u64::MAX;
167
168impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
169    /// # Safety
170    /// The caller must ensure
171    /// - the data is valid utf8 (if required)
172    /// - The offsets match the buffers.
173    pub unsafe fn new_unchecked(
174        dtype: ArrowDataType,
175        views: Buffer<View>,
176        buffers: Buffer<Buffer<u8>>,
177        validity: Option<Bitmap>,
178        total_bytes_len: Option<usize>,
179        total_buffer_len: usize,
180    ) -> Self {
181        // Verify the invariants
182        #[cfg(debug_assertions)]
183        {
184            if let Some(validity) = validity.as_ref() {
185                assert_eq!(validity.len(), views.len());
186            }
187
188            // @TODO: Enable this. There are still some bugs but disabled temporarily to get some fixes in.
189            // let mut actual_total_buffer_len = 0;
190            // let mut actual_total_bytes_len = 0;
191
192            // for buffer in buffers.iter() {
193            //     actual_total_buffer_len += buffer.len();
194            // }
195
196            for (i, view) in views.iter().enumerate() {
197                let is_valid = validity.as_ref().is_none_or(|v| v.get_bit(i));
198
199                if !is_valid {
200                    continue;
201                }
202
203                // actual_total_bytes_len += view.length as usize;
204                if view.length > View::MAX_INLINE_SIZE {
205                    assert!((view.buffer_idx as usize) < (buffers.len()));
206                    assert!(
207                        view.offset as usize + view.length as usize
208                            <= buffers[view.buffer_idx as usize].len()
209                    );
210                }
211            }
212
213            // assert_eq!(actual_total_buffer_len, total_buffer_len);
214            // if let Some(len) = total_bytes_len {
215            //     assert_eq!(actual_total_bytes_len, len);
216            // }
217        }
218
219        Self {
220            dtype,
221            views,
222            buffers,
223            validity,
224            phantom: Default::default(),
225            total_bytes_len: RelaxedCell::from(
226                total_bytes_len.map(|l| l as u64).unwrap_or(UNKNOWN_LEN),
227            ),
228            total_buffer_len,
229        }
230    }
231
232    /// Create a new BinaryViewArray but initialize a statistics compute.
233    ///
234    /// # Safety
235    /// The caller must ensure the invariants
236    pub unsafe fn new_unchecked_unknown_md(
237        dtype: ArrowDataType,
238        views: Buffer<View>,
239        buffers: Buffer<Buffer<u8>>,
240        validity: Option<Bitmap>,
241        total_buffer_len: Option<usize>,
242    ) -> Self {
243        let total_bytes_len = None;
244        let total_buffer_len =
245            total_buffer_len.unwrap_or_else(|| buffers.iter().map(|b| b.len()).sum());
246        Self::new_unchecked(
247            dtype,
248            views,
249            buffers,
250            validity,
251            total_bytes_len,
252            total_buffer_len,
253        )
254    }
255
256    pub fn data_buffers(&self) -> &Buffer<Buffer<u8>> {
257        &self.buffers
258    }
259
260    pub fn data_buffers_mut(&mut self) -> &mut Buffer<Buffer<u8>> {
261        &mut self.buffers
262    }
263
264    pub fn variadic_buffer_lengths(&self) -> Vec<i64> {
265        self.buffers.iter().map(|buf| buf.len() as i64).collect()
266    }
267
268    pub fn views(&self) -> &Buffer<View> {
269        &self.views
270    }
271
272    pub fn into_views(self) -> Vec<View> {
273        self.views.to_vec()
274    }
275
276    pub fn into_inner(
277        self,
278    ) -> (
279        Buffer<View>,
280        Buffer<Buffer<u8>>,
281        Option<Bitmap>,
282        Option<usize>,
283        usize,
284    ) {
285        let total_bytes_len = self.try_total_bytes_len();
286        let views = self.views;
287        let buffers = self.buffers;
288        let validity = self.validity;
289
290        (
291            views,
292            buffers,
293            validity,
294            total_bytes_len,
295            self.total_buffer_len,
296        )
297    }
298
299    /// Apply a function over the views. This can be used to update views in operations like slicing.
300    ///
301    /// # Safety
302    /// All invariants of the views must be maintained.
303    pub unsafe fn apply_views<F: FnMut(View, &T) -> View>(&self, mut update_view: F) -> Self {
304        let arr = self.clone();
305        let (views, buffers, validity, _total_bytes_len, total_buffer_len) = arr.into_inner();
306
307        let mut total_bytes_len = 0;
308        let mut views = views.to_vec();
309        for v in views.iter_mut() {
310            let str_slice = T::from_bytes_unchecked(v.get_slice_unchecked(&buffers));
311            *v = update_view(*v, str_slice);
312            total_bytes_len += v.length as usize;
313        }
314
315        let len_valid = validity.is_none();
316        Self::new_unchecked(
317            self.dtype.clone(),
318            views.into(),
319            buffers,
320            validity,
321            len_valid.then_some(total_bytes_len),
322            total_buffer_len,
323        )
324    }
325
326    /// Apply a function to the views as a mutable slice.
327    ///
328    /// # Safety
329    /// All invariants of the views must be maintained.
330    pub unsafe fn with_views_mut<F: FnOnce(&mut [View])>(&mut self, f: F) {
331        self.total_bytes_len.store(UNKNOWN_LEN);
332        if let Some(views) = self.views.get_mut_slice() {
333            f(views)
334        } else {
335            let mut views = self.views.as_slice().to_vec();
336            f(&mut views);
337            self.views = Buffer::from(views);
338        }
339    }
340
341    pub fn try_new(
342        dtype: ArrowDataType,
343        views: Buffer<View>,
344        buffers: Buffer<Buffer<u8>>,
345        validity: Option<Bitmap>,
346    ) -> PolarsResult<Self> {
347        if T::IS_UTF8 {
348            validate_utf8_views(views.as_ref(), buffers.as_ref())?;
349        } else {
350            validate_binary_views(views.as_ref(), buffers.as_ref())?;
351        }
352
353        if let Some(validity) = &validity {
354            polars_ensure!(validity.len()== views.len(), ComputeError: "validity mask length must match the number of values" )
355        }
356
357        unsafe {
358            Ok(Self::new_unchecked_unknown_md(
359                dtype, views, buffers, validity, None,
360            ))
361        }
362    }
363
364    /// Creates an empty [`BinaryViewArrayGeneric`], i.e. whose `.len` is zero.
365    #[inline]
366    pub fn new_empty(dtype: ArrowDataType) -> Self {
367        unsafe { Self::new_unchecked(dtype, Buffer::new(), Buffer::new(), None, Some(0), 0) }
368    }
369
370    /// Returns a new null [`BinaryViewArrayGeneric`] of `length`.
371    #[inline]
372    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
373        let validity = Some(Bitmap::new_zeroed(length));
374        unsafe {
375            Self::new_unchecked(
376                dtype,
377                Buffer::zeroed(length),
378                Buffer::new(),
379                validity,
380                Some(0),
381                0,
382            )
383        }
384    }
385
386    /// Returns the element at index `i`
387    /// # Panics
388    /// iff `i >= self.len()`
389    #[inline]
390    pub fn value(&self, i: usize) -> &T {
391        assert!(i < self.len());
392        unsafe { self.value_unchecked(i) }
393    }
394
395    /// Returns the element at index `i`
396    ///
397    /// # Safety
398    /// Assumes that the `i < self.len`.
399    #[inline]
400    pub unsafe fn value_unchecked(&self, i: usize) -> &T {
401        let v = self.views.get_unchecked(i);
402        T::from_bytes_unchecked(v.get_slice_unchecked(&self.buffers))
403    }
404
405    /// Returns the element at index `i`, or None if it is null.
406    /// # Panics
407    /// iff `i >= self.len()`
408    #[inline]
409    pub fn get(&self, i: usize) -> Option<&T> {
410        assert!(i < self.len());
411        unsafe { self.get_unchecked(i) }
412    }
413
414    /// Returns the element at index `i`, or None if it is null.
415    ///
416    /// # Safety
417    /// Assumes that the `i < self.len`.
418    #[inline]
419    pub unsafe fn get_unchecked(&self, i: usize) -> Option<&T> {
420        if self
421            .validity
422            .as_ref()
423            .is_none_or(|v| v.get_bit_unchecked(i))
424        {
425            let v = self.views.get_unchecked(i);
426            Some(T::from_bytes_unchecked(
427                v.get_slice_unchecked(&self.buffers),
428            ))
429        } else {
430            None
431        }
432    }
433
434    /// Returns an iterator of `Option<&T>` over every element of this array.
435    pub fn iter(&self) -> ZipValidity<&T, BinaryViewValueIter<'_, T>, BitmapIter<'_>> {
436        ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref())
437    }
438
439    /// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity
440    pub fn values_iter(&self) -> BinaryViewValueIter<'_, T> {
441        BinaryViewValueIter::new(self)
442    }
443
444    pub fn len_iter(&self) -> impl Iterator<Item = u32> + '_ {
445        self.views.iter().map(|v| v.length)
446    }
447
448    /// Returns an iterator of the non-null values.
449    pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, BinaryViewArrayGeneric<T>> {
450        NonNullValuesIter::new(self, self.validity())
451    }
452
453    /// Returns an iterator of the non-null values.
454    pub fn non_null_views_iter(&self) -> NonNullValuesIter<'_, Buffer<View>> {
455        NonNullValuesIter::new(self.views(), self.validity())
456    }
457
458    impl_sliced!();
459    impl_into_array!();
460
461    /// Returns this array with a new validity.
462    /// # Panic
463    /// Panics iff `validity.len() != self.len()`.
464    #[must_use]
465    #[inline]
466    pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
467        self.set_validity(validity);
468        self
469    }
470
471    /// Sets the validity of this array.
472    /// # Panics
473    /// This function panics iff `values.len() != self.len()`.
474    #[inline]
475    pub fn set_validity(&mut self, validity: Option<Bitmap>) {
476        if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
477            panic!("validity must be equal to the array's length")
478        }
479        self.total_bytes_len.store(UNKNOWN_LEN);
480        self.validity = validity;
481    }
482
483    /// Takes the validity of this array, leaving it without a validity mask.
484    #[inline]
485    pub fn take_validity(&mut self) -> Option<Bitmap> {
486        self.total_bytes_len.store(UNKNOWN_LEN);
487        self.validity.take()
488    }
489
490    pub fn from_slice<S: AsRef<T>, P: AsRef<[Option<S>]>>(slice: P) -> Self {
491        let mutable = MutableBinaryViewArray::from_iterator(
492            slice.as_ref().iter().map(|opt_v| opt_v.as_ref()),
493        );
494        mutable.into()
495    }
496
497    pub fn from_slice_values<S: AsRef<T>, P: AsRef<[S]>>(slice: P) -> Self {
498        let mutable =
499            MutableBinaryViewArray::from_values_iter(slice.as_ref().iter().map(|v| v.as_ref()));
500        mutable.into()
501    }
502
503    /// Get the total length of bytes that it would take to concatenate all binary/str values in this array.
504    pub fn total_bytes_len(&self) -> usize {
505        let total = self.total_bytes_len.load();
506        if total == UNKNOWN_LEN {
507            let total = ZipValidity::new_with_validity(self.len_iter(), self.validity.as_ref())
508                .map(|v| v.unwrap_or(0) as usize)
509                .sum::<usize>();
510            self.total_bytes_len.store(total as u64);
511            total
512        } else {
513            total as usize
514        }
515    }
516
517    /// Like total_bytes_len() but if unavailable will not force a computation.
518    pub fn try_total_bytes_len(&self) -> Option<usize> {
519        let b = self.total_bytes_len.load();
520        (b != UNKNOWN_LEN).then_some(b as usize)
521    }
522
523    /// Get the length of bytes that are stored in the variadic buffers.
524    pub fn total_buffer_len(&self) -> usize {
525        self.total_buffer_len
526    }
527
528    fn total_unshared_buffer_len(&self) -> usize {
529        // XXX: it is O(n), not O(1).
530        // Given this function is only called in `maybe_gc()`,
531        // it may not be worthy to add an extra field for this.
532        self.buffers
533            .iter()
534            .map(|buf| {
535                if buf.storage_refcount() > 1 {
536                    0
537                } else {
538                    buf.len()
539                }
540            })
541            .sum()
542    }
543
544    #[inline(always)]
545    pub fn len(&self) -> usize {
546        self.views.len()
547    }
548
549    /// Garbage collect
550    pub fn gc(self) -> Self {
551        if self.buffers.is_empty() {
552            return self;
553        }
554        let mut mutable = MutableBinaryViewArray::with_capacity(self.len());
555        let buffers = self.buffers.as_ref();
556
557        for view in self.views.as_ref() {
558            unsafe { mutable.push_view_unchecked(*view, buffers) }
559        }
560        mutable.freeze().with_validity(self.validity)
561    }
562
563    pub fn deshare(&self) -> Self {
564        if self.buffers.storage_refcount() == 1
565            && self.buffers.iter().all(|b| b.storage_refcount() == 1)
566        {
567            return self.clone();
568        }
569        self.clone().gc()
570    }
571
572    pub fn is_sliced(&self) -> bool {
573        !std::ptr::eq(self.views.as_ptr(), self.views.storage_ptr())
574    }
575
576    pub fn maybe_gc(self) -> Self {
577        const GC_MINIMUM_SAVINGS: usize = 16 * 1024; // At least 16 KiB.
578
579        if self.total_buffer_len <= GC_MINIMUM_SAVINGS {
580            return self;
581        }
582
583        if self.buffers.storage_refcount() != 1 {
584            // There are multiple holders of this `buffers`.
585            // If we allow gc in this case,
586            // it may end up copying the same content multiple times.
587            return self;
588        }
589
590        // Subtract the maximum amount of inlined strings to get a lower bound
591        // on the number of buffer bytes needed (assuming no dedup).
592        let total_bytes_len = self.total_bytes_len();
593        let buffer_req_lower_bound = total_bytes_len.saturating_sub(self.len() * 12);
594
595        let lower_bound_mem_usage_post_gc = self.len() * 16 + buffer_req_lower_bound;
596        // Use unshared buffer len. Shared buffer won't be freed; no savings.
597        let cur_mem_usage = self.len() * 16 + self.total_unshared_buffer_len();
598        let savings_upper_bound = cur_mem_usage.saturating_sub(lower_bound_mem_usage_post_gc);
599
600        if savings_upper_bound >= GC_MINIMUM_SAVINGS
601            && cur_mem_usage >= 4 * lower_bound_mem_usage_post_gc
602        {
603            self.gc()
604        } else {
605            self
606        }
607    }
608
609    pub fn make_mut(self) -> MutableBinaryViewArray<T> {
610        let views = self.views.to_vec();
611        let completed_buffers = self.buffers.to_vec();
612        let validity = self.validity.map(|bitmap| bitmap.make_mut());
613
614        // We need to know the total_bytes_len if we are going to mutate it.
615        let mut total_bytes_len = self.total_bytes_len.load();
616        if total_bytes_len == UNKNOWN_LEN {
617            total_bytes_len = views.iter().map(|view| view.length as u64).sum();
618        }
619        let total_bytes_len = total_bytes_len as usize;
620
621        MutableBinaryViewArray {
622            views,
623            completed_buffers,
624            in_progress_buffer: vec![],
625            validity,
626            phantom: Default::default(),
627            total_bytes_len,
628            total_buffer_len: self.total_buffer_len,
629            stolen_buffers: PlHashMap::new(),
630        }
631    }
632}
633
634impl BinaryViewArray {
635    /// Validate the underlying bytes on UTF-8.
636    pub fn validate_utf8(&self) -> PolarsResult<()> {
637        // SAFETY: views are correct
638        unsafe { validate_views_utf8_only(&self.views, &self.buffers, 0) }
639    }
640
641    /// Convert [`BinaryViewArray`] to [`Utf8ViewArray`].
642    pub fn to_utf8view(&self) -> PolarsResult<Utf8ViewArray> {
643        self.validate_utf8()?;
644        unsafe { Ok(self.to_utf8view_unchecked()) }
645    }
646
647    /// Convert [`BinaryViewArray`] to [`Utf8ViewArray`] without checking UTF-8.
648    ///
649    /// # Safety
650    /// The caller must ensure the underlying data is valid UTF-8.
651    pub unsafe fn to_utf8view_unchecked(&self) -> Utf8ViewArray {
652        Utf8ViewArray::new_unchecked(
653            ArrowDataType::Utf8View,
654            self.views.clone(),
655            self.buffers.clone(),
656            self.validity.clone(),
657            self.try_total_bytes_len(),
658            self.total_buffer_len,
659        )
660    }
661}
662
663impl Utf8ViewArray {
664    pub fn to_binview(&self) -> BinaryViewArray {
665        // SAFETY: same invariants.
666        unsafe {
667            BinaryViewArray::new_unchecked(
668                ArrowDataType::BinaryView,
669                self.views.clone(),
670                self.buffers.clone(),
671                self.validity.clone(),
672                self.try_total_bytes_len(),
673                self.total_buffer_len,
674            )
675        }
676    }
677}
678
679impl<T: ViewType + ?Sized> Array for BinaryViewArrayGeneric<T> {
680    fn as_any(&self) -> &dyn Any {
681        self
682    }
683
684    fn as_any_mut(&mut self) -> &mut dyn Any {
685        self
686    }
687
688    #[inline(always)]
689    fn len(&self) -> usize {
690        BinaryViewArrayGeneric::len(self)
691    }
692
693    #[inline(always)]
694    fn dtype(&self) -> &ArrowDataType {
695        &self.dtype
696    }
697
698    #[inline(always)]
699    fn dtype_mut(&mut self) -> &mut ArrowDataType {
700        &mut self.dtype
701    }
702
703    fn validity(&self) -> Option<&Bitmap> {
704        self.validity.as_ref()
705    }
706
707    fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
708        let (lhs, rhs) = Splitable::split_at(self, offset);
709        (Box::new(lhs), Box::new(rhs))
710    }
711
712    unsafe fn split_at_boxed_unchecked(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
713        let (lhs, rhs) = unsafe { Splitable::split_at_unchecked(self, offset) };
714        (Box::new(lhs), Box::new(rhs))
715    }
716
717    fn slice(&mut self, offset: usize, length: usize) {
718        assert!(
719            offset + length <= self.len(),
720            "the offset of the new Buffer cannot exceed the existing length"
721        );
722        unsafe { self.slice_unchecked(offset, length) }
723    }
724
725    unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
726        debug_assert!(offset + length <= self.len());
727        self.validity = self
728            .validity
729            .take()
730            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
731            .filter(|bitmap| bitmap.unset_bits() > 0);
732        self.views.slice_in_place_unchecked(offset..offset + length);
733        self.total_bytes_len.store(UNKNOWN_LEN)
734    }
735
736    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
737        debug_assert!(
738            validity.as_ref().is_none_or(|v| v.len() == self.len()),
739            "{} != {}",
740            validity.as_ref().unwrap().len(),
741            self.len()
742        );
743
744        let mut new = self.clone();
745        new.validity = validity;
746        Box::new(new)
747    }
748
749    fn to_boxed(&self) -> Box<dyn Array> {
750        Box::new(self.clone())
751    }
752}
753
754impl<T: ViewType + ?Sized> Splitable for BinaryViewArrayGeneric<T> {
755    fn check_bound(&self, offset: usize) -> bool {
756        offset <= self.len()
757    }
758
759    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
760        let (lhs_views, rhs_views) = unsafe { self.views.split_at_unchecked(offset) };
761        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
762
763        unsafe {
764            (
765                Self::new_unchecked(
766                    self.dtype.clone(),
767                    lhs_views,
768                    self.buffers.clone(),
769                    lhs_validity,
770                    (offset == 0).then_some(0),
771                    self.total_buffer_len(),
772                ),
773                Self::new_unchecked(
774                    self.dtype.clone(),
775                    rhs_views,
776                    self.buffers.clone(),
777                    rhs_validity,
778                    (offset == self.len()).then_some(0),
779                    self.total_buffer_len(),
780                ),
781            )
782        }
783    }
784}