polars_arrow/array/binview/
mutable.rs

1use std::any::Any;
2use std::fmt::{Debug, Formatter};
3use std::ops::Deref;
4use std::sync::Arc;
5
6use hashbrown::hash_map::Entry;
7use polars_error::PolarsResult;
8use polars_utils::aliases::{InitHashMaps, PlHashMap};
9
10use crate::array::binview::iterator::MutableBinaryViewValueIter;
11use crate::array::binview::view::validate_views_utf8_only;
12use crate::array::binview::{
13    BinaryViewArrayGeneric, DEFAULT_BLOCK_SIZE, MAX_EXP_BLOCK_SIZE, ViewType,
14};
15use crate::array::{Array, MutableArray, TryExtend, TryPush, View};
16use crate::bitmap::MutableBitmap;
17use crate::buffer::Buffer;
18use crate::datatypes::ArrowDataType;
19use crate::legacy::trusted_len::TrustedLenPush;
20use crate::trusted_len::TrustedLen;
21
22// Invariants:
23//
24// - Each view must point to a valid slice of a buffer
25// - `total_buffer_len` must be equal to `completed_buffers.iter().map(Vec::len).sum()`
26// - `total_bytes_len` must be equal to `views.iter().map(View::len).sum()`
27pub struct MutableBinaryViewArray<T: ViewType + ?Sized> {
28    pub(crate) views: Vec<View>,
29    pub(crate) completed_buffers: Vec<Buffer<u8>>,
30    pub(crate) in_progress_buffer: Vec<u8>,
31    pub(crate) validity: Option<MutableBitmap>,
32    pub(crate) phantom: std::marker::PhantomData<T>,
33    /// Total bytes length if we would concatenate them all.
34    pub(crate) total_bytes_len: usize,
35    /// Total bytes in the buffer (excluding remaining capacity)
36    pub(crate) total_buffer_len: usize,
37    /// Mapping from `Buffer::deref()` to index in `completed_buffers`.
38    /// Used in `push_view()`.
39    pub(crate) stolen_buffers: PlHashMap<usize, u32>,
40}
41
42impl<T: ViewType + ?Sized> Clone for MutableBinaryViewArray<T> {
43    fn clone(&self) -> Self {
44        Self {
45            views: self.views.clone(),
46            completed_buffers: self.completed_buffers.clone(),
47            in_progress_buffer: self.in_progress_buffer.clone(),
48            validity: self.validity.clone(),
49            phantom: Default::default(),
50            total_bytes_len: self.total_bytes_len,
51            total_buffer_len: self.total_buffer_len,
52            stolen_buffers: PlHashMap::new(),
53        }
54    }
55}
56
57impl<T: ViewType + ?Sized> Debug for MutableBinaryViewArray<T> {
58    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
59        write!(f, "mutable-binview{:?}", T::DATA_TYPE)
60    }
61}
62
63impl<T: ViewType + ?Sized> Default for MutableBinaryViewArray<T> {
64    fn default() -> Self {
65        Self::with_capacity(0)
66    }
67}
68
69impl<T: ViewType + ?Sized> From<MutableBinaryViewArray<T>> for BinaryViewArrayGeneric<T> {
70    fn from(mut value: MutableBinaryViewArray<T>) -> Self {
71        value.finish_in_progress();
72        unsafe {
73            Self::new_unchecked(
74                T::DATA_TYPE,
75                value.views.into(),
76                Arc::from(value.completed_buffers),
77                value.validity.map(|b| b.into()),
78                value.total_bytes_len,
79                value.total_buffer_len,
80            )
81        }
82    }
83}
84
85impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
86    pub fn new() -> Self {
87        Self::default()
88    }
89
90    pub fn with_capacity(capacity: usize) -> Self {
91        Self {
92            views: Vec::with_capacity(capacity),
93            completed_buffers: vec![],
94            in_progress_buffer: vec![],
95            validity: None,
96            phantom: Default::default(),
97            total_buffer_len: 0,
98            total_bytes_len: 0,
99            stolen_buffers: PlHashMap::new(),
100        }
101    }
102
103    /// Get a mutable reference to the [`Vec`] of [`View`]s in this [`MutableBinaryViewArray`].
104    ///
105    /// # Safety
106    ///
107    /// This is safe as long as any mutation of the [`Vec`] does not break any invariants of the
108    /// [`MutableBinaryViewArray`] before it is read again.
109    #[inline]
110    pub unsafe fn views_mut(&mut self) -> &mut Vec<View> {
111        &mut self.views
112    }
113
114    /// Set the `total_bytes_len` of the [`MutableBinaryViewArray`]
115    ///
116    /// # Safety
117    ///
118    /// This should not break invariants of the [`MutableBinaryViewArray`]
119    #[inline]
120    pub unsafe fn set_total_bytes_len(&mut self, value: usize) {
121        #[cfg(debug_assertions)]
122        {
123            let actual_length: usize = self.views().iter().map(|v| v.length as usize).sum();
124            assert_eq!(value, actual_length);
125        }
126
127        self.total_bytes_len = value;
128    }
129
130    pub fn total_bytes_len(&self) -> usize {
131        self.total_bytes_len
132    }
133
134    pub fn total_buffer_len(&self) -> usize {
135        self.total_buffer_len
136    }
137
138    #[inline]
139    pub fn views(&self) -> &[View] {
140        &self.views
141    }
142
143    #[inline]
144    pub fn completed_buffers(&self) -> &[Buffer<u8>] {
145        &self.completed_buffers
146    }
147
148    pub fn validity(&mut self) -> Option<&mut MutableBitmap> {
149        self.validity.as_mut()
150    }
151
152    /// Reserves `additional` elements and `additional_buffer` on the buffer.
153    pub fn reserve(&mut self, additional: usize) {
154        self.views.reserve(additional);
155    }
156
157    #[inline]
158    pub fn len(&self) -> usize {
159        self.views.len()
160    }
161
162    #[inline]
163    pub fn capacity(&self) -> usize {
164        self.views.capacity()
165    }
166
167    fn init_validity(&mut self, unset_last: bool) {
168        let mut validity = MutableBitmap::with_capacity(self.views.capacity());
169        validity.extend_constant(self.len(), true);
170        if unset_last {
171            validity.set(self.len() - 1, false);
172        }
173        self.validity = Some(validity);
174    }
175
176    /// # Safety
177    /// - caller must allocate enough capacity
178    /// - caller must ensure the view and buffers match.
179    /// - The array must not have validity.
180    pub(crate) unsafe fn push_view_unchecked(&mut self, v: View, buffers: &[Buffer<u8>]) {
181        let len = v.length;
182        self.total_bytes_len += len as usize;
183        if len <= 12 {
184            debug_assert!(self.views.capacity() > self.views.len());
185            self.views.push_unchecked(v)
186        } else {
187            self.total_buffer_len += len as usize;
188            let data = buffers.get_unchecked(v.buffer_idx as usize);
189            let offset = v.offset as usize;
190            let bytes = data.get_unchecked(offset..offset + len as usize);
191            let t = T::from_bytes_unchecked(bytes);
192            self.push_value_ignore_validity(t)
193        }
194    }
195
196    /// # Safety
197    /// - caller must allocate enough capacity
198    /// - caller must ensure the view and buffers match.
199    /// - The array must not have validity.
200    /// - caller must not mix use this function with other push functions.
201    pub unsafe fn push_view_unchecked_dedupe(&mut self, mut v: View, buffers: &[Buffer<u8>]) {
202        let len = v.length;
203        self.total_bytes_len += len as usize;
204        if len <= 12 {
205            self.views.push_unchecked(v);
206        } else {
207            let buffer = buffers.get_unchecked(v.buffer_idx as usize);
208            let idx = match self.stolen_buffers.entry(buffer.deref().as_ptr() as usize) {
209                Entry::Occupied(entry) => *entry.get(),
210                Entry::Vacant(entry) => {
211                    let idx = self.completed_buffers.len() as u32;
212                    entry.insert(idx);
213                    self.completed_buffers.push(buffer.clone());
214                    self.total_buffer_len += buffer.len();
215                    idx
216                },
217            };
218            v.buffer_idx = idx;
219            self.views.push_unchecked(v);
220        }
221    }
222
223    pub fn push_view(&mut self, mut v: View, buffers: &[Buffer<u8>]) {
224        let len = v.length;
225        self.total_bytes_len += len as usize;
226        if len <= 12 {
227            self.views.push(v);
228        } else {
229            // Do no mix use of push_view and push_value_ignore_validity -
230            // it causes fragmentation.
231            self.finish_in_progress();
232
233            let buffer = &buffers[v.buffer_idx as usize];
234            let idx = match self.stolen_buffers.entry(buffer.deref().as_ptr() as usize) {
235                Entry::Occupied(entry) => {
236                    let idx = *entry.get();
237                    let target_buffer = &self.completed_buffers[idx as usize];
238                    debug_assert_eq!(buffer, target_buffer);
239                    idx
240                },
241                Entry::Vacant(entry) => {
242                    let idx = self.completed_buffers.len() as u32;
243                    entry.insert(idx);
244                    self.completed_buffers.push(buffer.clone());
245                    self.total_buffer_len += buffer.len();
246                    idx
247                },
248            };
249            v.buffer_idx = idx;
250            self.views.push(v);
251        }
252        if let Some(validity) = &mut self.validity {
253            validity.push(true)
254        }
255    }
256
257    #[inline]
258    pub fn push_value_ignore_validity<V: AsRef<T>>(&mut self, value: V) {
259        let bytes = value.as_ref().to_bytes();
260        self.total_bytes_len += bytes.len();
261
262        // A string can only be maximum of 4GB in size.
263        let len = u32::try_from(bytes.len()).unwrap();
264
265        let view = if len <= View::MAX_INLINE_SIZE {
266            View::new_inline(bytes)
267        } else {
268            self.total_buffer_len += bytes.len();
269
270            // We want to make sure that we never have to memcopy between buffers. So if the
271            // current buffer is not large enough, create a new buffer that is large enough and try
272            // to anticipate the larger size.
273            let required_capacity = self.in_progress_buffer.len() + bytes.len();
274            let does_not_fit_in_buffer = self.in_progress_buffer.capacity() < required_capacity;
275
276            // We can only save offsets that are below u32::MAX
277            let offset_will_not_fit = self.in_progress_buffer.len() > u32::MAX as usize;
278
279            if does_not_fit_in_buffer || offset_will_not_fit {
280                // Allocate a new buffer and flush the old buffer
281                let new_capacity = (self.in_progress_buffer.capacity() * 2)
282                    .clamp(DEFAULT_BLOCK_SIZE, MAX_EXP_BLOCK_SIZE)
283                    .max(bytes.len());
284                let in_progress = Vec::with_capacity(new_capacity);
285                let flushed = std::mem::replace(&mut self.in_progress_buffer, in_progress);
286                if !flushed.is_empty() {
287                    self.completed_buffers.push(flushed.into())
288                }
289            }
290
291            let offset = self.in_progress_buffer.len() as u32;
292            self.in_progress_buffer.extend_from_slice(bytes);
293
294            let buffer_idx = u32::try_from(self.completed_buffers.len()).unwrap();
295
296            View::new_from_bytes(bytes, buffer_idx, offset)
297        };
298
299        self.views.push(view);
300    }
301
302    #[inline]
303    pub fn push_buffer(&mut self, buffer: Buffer<u8>) -> u32 {
304        self.finish_in_progress();
305
306        let buffer_idx = self.completed_buffers.len();
307        self.total_buffer_len += buffer.len();
308        self.completed_buffers.push(buffer);
309        buffer_idx as u32
310    }
311
312    #[inline]
313    pub fn push_value<V: AsRef<T>>(&mut self, value: V) {
314        if let Some(validity) = &mut self.validity {
315            validity.push(true)
316        }
317        self.push_value_ignore_validity(value)
318    }
319
320    #[inline]
321    pub fn push<V: AsRef<T>>(&mut self, value: Option<V>) {
322        if let Some(value) = value {
323            self.push_value(value)
324        } else {
325            self.push_null()
326        }
327    }
328
329    #[inline]
330    pub fn push_null(&mut self) {
331        self.views.push(View::default());
332        match &mut self.validity {
333            Some(validity) => validity.push(false),
334            None => self.init_validity(true),
335        }
336    }
337
338    pub fn extend_null(&mut self, additional: usize) {
339        if self.validity.is_none() && additional > 0 {
340            self.init_validity(false);
341        }
342        self.views
343            .extend(std::iter::repeat_n(View::default(), additional));
344        if let Some(validity) = &mut self.validity {
345            validity.extend_constant(additional, false);
346        }
347    }
348
349    pub fn extend_constant<V: AsRef<T>>(&mut self, additional: usize, value: Option<V>) {
350        if value.is_none() && self.validity.is_none() {
351            self.init_validity(false);
352        }
353
354        if let Some(validity) = &mut self.validity {
355            validity.extend_constant(additional, value.is_some())
356        }
357
358        // Push and pop to get the properly encoded value.
359        // For long string this leads to a dictionary encoding,
360        // as we push the string only once in the buffers
361        let view_value = value
362            .map(|v| {
363                self.push_value_ignore_validity(v);
364                self.views.pop().unwrap()
365            })
366            .unwrap_or_default();
367        self.views
368            .extend(std::iter::repeat_n(view_value, additional));
369    }
370
371    impl_mutable_array_mut_validity!();
372
373    #[inline]
374    pub fn extend_values<I, P>(&mut self, iterator: I)
375    where
376        I: Iterator<Item = P>,
377        P: AsRef<T>,
378    {
379        self.reserve(iterator.size_hint().0);
380        for v in iterator {
381            self.push_value(v)
382        }
383    }
384
385    #[inline]
386    pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)
387    where
388        I: TrustedLen<Item = P>,
389        P: AsRef<T>,
390    {
391        self.extend_values(iterator)
392    }
393
394    #[inline]
395    pub fn extend<I, P>(&mut self, iterator: I)
396    where
397        I: Iterator<Item = Option<P>>,
398        P: AsRef<T>,
399    {
400        self.reserve(iterator.size_hint().0);
401        for p in iterator {
402            self.push(p)
403        }
404    }
405
406    #[inline]
407    pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
408    where
409        I: TrustedLen<Item = Option<P>>,
410        P: AsRef<T>,
411    {
412        self.extend(iterator)
413    }
414
415    #[inline]
416    pub fn extend_views<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])
417    where
418        I: Iterator<Item = Option<View>>,
419    {
420        self.reserve(iterator.size_hint().0);
421        for p in iterator {
422            match p {
423                Some(v) => self.push_view(v, buffers),
424                None => self.push_null(),
425            }
426        }
427    }
428
429    #[inline]
430    pub fn extend_views_trusted_len<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])
431    where
432        I: TrustedLen<Item = Option<View>>,
433    {
434        self.extend_views(iterator, buffers);
435    }
436
437    #[inline]
438    pub fn extend_non_null_views<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])
439    where
440        I: Iterator<Item = View>,
441    {
442        self.reserve(iterator.size_hint().0);
443        for v in iterator {
444            self.push_view(v, buffers);
445        }
446    }
447
448    #[inline]
449    pub fn extend_non_null_views_trusted_len<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])
450    where
451        I: TrustedLen<Item = View>,
452    {
453        self.extend_non_null_views(iterator, buffers);
454    }
455
456    /// # Safety
457    /// Same as `push_view_unchecked()`.
458    #[inline]
459    pub unsafe fn extend_non_null_views_unchecked<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])
460    where
461        I: Iterator<Item = View>,
462    {
463        self.reserve(iterator.size_hint().0);
464        for v in iterator {
465            self.push_view_unchecked(v, buffers);
466        }
467    }
468
469    /// # Safety
470    /// Same as `push_view_unchecked()`.
471    #[inline]
472    pub unsafe fn extend_non_null_views_unchecked_dedupe<I>(
473        &mut self,
474        iterator: I,
475        buffers: &[Buffer<u8>],
476    ) where
477        I: Iterator<Item = View>,
478    {
479        self.reserve(iterator.size_hint().0);
480        for v in iterator {
481            self.push_view_unchecked_dedupe(v, buffers);
482        }
483    }
484
485    #[inline]
486    pub fn from_iterator<I, P>(iterator: I) -> Self
487    where
488        I: Iterator<Item = Option<P>>,
489        P: AsRef<T>,
490    {
491        let mut mutable = Self::with_capacity(iterator.size_hint().0);
492        mutable.extend(iterator);
493        mutable
494    }
495
496    pub fn from_values_iter<I, P>(iterator: I) -> Self
497    where
498        I: Iterator<Item = P>,
499        P: AsRef<T>,
500    {
501        let mut mutable = Self::with_capacity(iterator.size_hint().0);
502        mutable.extend_values(iterator);
503        mutable
504    }
505
506    pub fn from<S: AsRef<T>, P: AsRef<[Option<S>]>>(slice: P) -> Self {
507        Self::from_iterator(slice.as_ref().iter().map(|opt_v| opt_v.as_ref()))
508    }
509
510    pub fn finish_in_progress(&mut self) -> bool {
511        if !self.in_progress_buffer.is_empty() {
512            self.completed_buffers
513                .push(std::mem::take(&mut self.in_progress_buffer).into());
514            true
515        } else {
516            false
517        }
518    }
519
520    #[inline]
521    pub fn freeze(self) -> BinaryViewArrayGeneric<T> {
522        self.into()
523    }
524
525    #[inline]
526    pub fn freeze_with_dtype(self, dtype: ArrowDataType) -> BinaryViewArrayGeneric<T> {
527        let mut arr: BinaryViewArrayGeneric<T> = self.into();
528        arr.dtype = dtype;
529        arr
530    }
531
532    pub fn take(self) -> (Vec<View>, Vec<Buffer<u8>>) {
533        (self.views, self.completed_buffers)
534    }
535
536    #[inline]
537    pub fn value(&self, i: usize) -> &T {
538        assert!(i < self.len());
539        unsafe { self.value_unchecked(i) }
540    }
541
542    /// Returns the element at index `i`
543    ///
544    /// # Safety
545    /// Assumes that the `i < self.len`.
546    #[inline]
547    pub unsafe fn value_unchecked(&self, i: usize) -> &T {
548        self.value_from_view_unchecked(self.views.get_unchecked(i))
549    }
550
551    /// Returns the element indicated by the given view.
552    ///
553    /// # Safety
554    /// Assumes the View belongs to this MutableBinaryViewArray.
555    pub unsafe fn value_from_view_unchecked<'a>(&'a self, view: &'a View) -> &'a T {
556        // View layout:
557        // length: 4 bytes
558        // prefix: 4 bytes
559        // buffer_index: 4 bytes
560        // offset: 4 bytes
561
562        // Inlined layout:
563        // length: 4 bytes
564        // data: 12 bytes
565        let len = view.length;
566        let bytes = if len <= 12 {
567            let ptr = view as *const View as *const u8;
568            std::slice::from_raw_parts(ptr.add(4), len as usize)
569        } else {
570            let buffer_idx = view.buffer_idx as usize;
571            let offset = view.offset;
572
573            let data = if buffer_idx == self.completed_buffers.len() {
574                self.in_progress_buffer.as_slice()
575            } else {
576                self.completed_buffers.get_unchecked(buffer_idx)
577            };
578
579            let offset = offset as usize;
580            data.get_unchecked(offset..offset + len as usize)
581        };
582        T::from_bytes_unchecked(bytes)
583    }
584
585    /// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity
586    pub fn values_iter(&self) -> MutableBinaryViewValueIter<T> {
587        MutableBinaryViewValueIter::new(self)
588    }
589
590    pub fn extend_from_array(&mut self, other: &BinaryViewArrayGeneric<T>) {
591        let slf_len = self.len();
592        match (&mut self.validity, other.validity()) {
593            (None, None) => {},
594            (Some(v), None) => v.extend_constant(other.len(), true),
595            (v @ None, Some(other)) => {
596                let mut bm = MutableBitmap::with_capacity(slf_len + other.len());
597                bm.extend_constant(slf_len, true);
598                bm.extend_from_bitmap(other);
599                *v = Some(bm);
600            },
601            (Some(slf), Some(other)) => slf.extend_from_bitmap(other),
602        }
603
604        if other.total_buffer_len() == 0 {
605            self.views.extend(other.views().iter().copied());
606        } else {
607            self.finish_in_progress();
608
609            let buffer_offset = self.completed_buffers().len() as u32;
610            self.completed_buffers
611                .extend(other.data_buffers().iter().cloned());
612
613            self.views.extend(other.views().iter().map(|view| {
614                let mut view = *view;
615                if view.length > View::MAX_INLINE_SIZE {
616                    view.buffer_idx += buffer_offset;
617                }
618                view
619            }));
620
621            let new_total_buffer_len = self.total_buffer_len() + other.total_buffer_len();
622            self.total_buffer_len = new_total_buffer_len;
623        }
624
625        self.total_bytes_len = self.total_bytes_len() + other.total_bytes_len();
626    }
627}
628
629impl MutableBinaryViewArray<[u8]> {
630    pub fn validate_utf8(&mut self, buffer_offset: usize, views_offset: usize) -> PolarsResult<()> {
631        // Finish the in progress as it might be required for validation.
632        let pushed = self.finish_in_progress();
633        // views are correct
634        unsafe {
635            validate_views_utf8_only(
636                &self.views[views_offset..],
637                &self.completed_buffers,
638                buffer_offset,
639            )?
640        }
641        // Restore in-progress buffer as we don't want to get too small buffers
642        if pushed {
643            if let Some(last) = self.completed_buffers.pop() {
644                self.in_progress_buffer = last.into_mut().right().unwrap();
645            }
646        }
647        Ok(())
648    }
649
650    /// Extend from a `buffer` and `length` of items given some statistics about the lengths.
651    ///
652    /// This will attempt to dispatch to several optimized implementations.
653    ///
654    /// # Safety
655    ///
656    /// This is safe if the statistics are correct.
657    pub unsafe fn extend_from_lengths_with_stats(
658        &mut self,
659        buffer: &[u8],
660        lengths_iterator: impl Clone + ExactSizeIterator<Item = usize>,
661        min_length: usize,
662        max_length: usize,
663        sum_length: usize,
664    ) {
665        let num_items = lengths_iterator.len();
666
667        if num_items == 0 {
668            return;
669        }
670
671        #[cfg(debug_assertions)]
672        {
673            let (min, max, sum) = lengths_iterator.clone().map(|v| (v, v, v)).fold(
674                (usize::MAX, usize::MIN, 0usize),
675                |(cmin, cmax, csum), (emin, emax, esum)| {
676                    (cmin.min(emin), cmax.max(emax), csum + esum)
677                },
678            );
679
680            assert_eq!(min, min_length);
681            assert_eq!(max, max_length);
682            assert_eq!(sum, sum_length);
683        }
684
685        assert!(sum_length <= buffer.len());
686
687        let mut buffer_offset = 0;
688        if min_length > View::MAX_INLINE_SIZE as usize
689            && (num_items == 1 || sum_length + self.in_progress_buffer.len() <= u32::MAX as usize)
690        {
691            let buffer_idx = self.completed_buffers().len() as u32;
692            let in_progress_buffer_offset = self.in_progress_buffer.len();
693
694            self.total_bytes_len += sum_length;
695            self.total_buffer_len += sum_length;
696
697            self.in_progress_buffer
698                .extend_from_slice(&buffer[..sum_length]);
699            self.views.extend(lengths_iterator.map(|length| {
700                // SAFETY: We asserted before that the sum of all lengths is smaller or equal to
701                // the buffer length.
702                let view_buffer =
703                    unsafe { buffer.get_unchecked(buffer_offset..buffer_offset + length) };
704
705                // SAFETY: We know that the minimum length > View::MAX_INLINE_SIZE. Therefore, this
706                // length is > View::MAX_INLINE_SIZE.
707                let view = unsafe {
708                    View::new_noninline_unchecked(
709                        view_buffer,
710                        buffer_idx,
711                        (buffer_offset + in_progress_buffer_offset) as u32,
712                    )
713                };
714                buffer_offset += length;
715                view
716            }));
717        } else if max_length <= View::MAX_INLINE_SIZE as usize {
718            self.total_bytes_len += sum_length;
719
720            // If the min and max are the same, we can dispatch to the optimized SIMD
721            // implementation.
722            if min_length == max_length {
723                let length = min_length;
724                if length == 0 {
725                    self.views
726                        .resize(self.views.len() + num_items, View::new_inline(&[]));
727                } else {
728                    View::extend_with_inlinable_strided(
729                        &mut self.views,
730                        &buffer[..length * num_items],
731                        length as u8,
732                    );
733                }
734            } else {
735                self.views.extend(lengths_iterator.map(|length| {
736                    // SAFETY: We asserted before that the sum of all lengths is smaller or equal
737                    // to the buffer length.
738                    let view_buffer =
739                        unsafe { buffer.get_unchecked(buffer_offset..buffer_offset + length) };
740
741                    // SAFETY: We know that each view has a length <= View::MAX_INLINE_SIZE because
742                    // the maximum length is <= View::MAX_INLINE_SIZE
743                    let view = unsafe { View::new_inline_unchecked(view_buffer) };
744
745                    buffer_offset += length;
746
747                    view
748                }));
749            }
750        } else {
751            // If all fails, just fall back to a base implementation.
752            self.reserve(num_items);
753            for length in lengths_iterator {
754                let value = &buffer[buffer_offset..buffer_offset + length];
755                buffer_offset += length;
756                self.push_value(value);
757            }
758        }
759    }
760
761    /// Extend from a `buffer` and `length` of items.
762    ///
763    /// This will attempt to dispatch to several optimized implementations.
764    #[inline]
765    pub fn extend_from_lengths(
766        &mut self,
767        buffer: &[u8],
768        lengths_iterator: impl Clone + ExactSizeIterator<Item = usize>,
769    ) {
770        let (min, max, sum) = lengths_iterator.clone().map(|v| (v, v, v)).fold(
771            (usize::MAX, usize::MIN, 0usize),
772            |(cmin, cmax, csum), (emin, emax, esum)| (cmin.min(emin), cmax.max(emax), csum + esum),
773        );
774
775        // SAFETY: We just collected the right stats.
776        unsafe { self.extend_from_lengths_with_stats(buffer, lengths_iterator, min, max, sum) }
777    }
778}
779
780impl<T: ViewType + ?Sized, P: AsRef<T>> Extend<Option<P>> for MutableBinaryViewArray<T> {
781    #[inline]
782    fn extend<I: IntoIterator<Item = Option<P>>>(&mut self, iter: I) {
783        Self::extend(self, iter.into_iter())
784    }
785}
786
787impl<T: ViewType + ?Sized, P: AsRef<T>> FromIterator<Option<P>> for MutableBinaryViewArray<T> {
788    #[inline]
789    fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {
790        Self::from_iterator(iter.into_iter())
791    }
792}
793
794impl<T: ViewType + ?Sized> MutableArray for MutableBinaryViewArray<T> {
795    fn dtype(&self) -> &ArrowDataType {
796        T::dtype()
797    }
798
799    fn len(&self) -> usize {
800        MutableBinaryViewArray::len(self)
801    }
802
803    fn validity(&self) -> Option<&MutableBitmap> {
804        self.validity.as_ref()
805    }
806
807    fn as_box(&mut self) -> Box<dyn Array> {
808        let mutable = std::mem::take(self);
809        let arr: BinaryViewArrayGeneric<T> = mutable.into();
810        arr.boxed()
811    }
812
813    fn as_any(&self) -> &dyn Any {
814        self
815    }
816
817    fn as_mut_any(&mut self) -> &mut dyn Any {
818        self
819    }
820
821    fn push_null(&mut self) {
822        MutableBinaryViewArray::push_null(self)
823    }
824
825    fn reserve(&mut self, additional: usize) {
826        MutableBinaryViewArray::reserve(self, additional)
827    }
828
829    fn shrink_to_fit(&mut self) {
830        self.views.shrink_to_fit()
831    }
832}
833
834impl<T: ViewType + ?Sized, P: AsRef<T>> TryExtend<Option<P>> for MutableBinaryViewArray<T> {
835    /// This is infallible and is implemented for consistency with all other types
836    #[inline]
837    fn try_extend<I: IntoIterator<Item = Option<P>>>(&mut self, iter: I) -> PolarsResult<()> {
838        self.extend(iter.into_iter());
839        Ok(())
840    }
841}
842
843impl<T: ViewType + ?Sized, P: AsRef<T>> TryPush<Option<P>> for MutableBinaryViewArray<T> {
844    /// This is infallible and is implemented for consistency with all other types
845    #[inline(always)]
846    fn try_push(&mut self, item: Option<P>) -> PolarsResult<()> {
847        self.push(item.as_ref().map(|p| p.as_ref()));
848        Ok(())
849    }
850}
851
852#[cfg(test)]
853mod tests {
854    use super::*;
855
856    fn roundtrip(values: &[&[u8]]) -> bool {
857        let buffer = values
858            .iter()
859            .flat_map(|v| v.iter().copied())
860            .collect::<Vec<u8>>();
861        let lengths = values.iter().map(|v| v.len()).collect::<Vec<usize>>();
862        let mut bv = MutableBinaryViewArray::<[u8]>::with_capacity(values.len());
863
864        bv.extend_from_lengths(&buffer[..], lengths.into_iter());
865
866        &bv.values_iter().collect::<Vec<&[u8]>>()[..] == values
867    }
868
869    #[test]
870    fn extend_with_lengths_basic() {
871        assert!(roundtrip(&[]));
872        assert!(roundtrip(&[b"abc"]));
873        assert!(roundtrip(&[
874            b"a_very_very_long_string_that_is_not_inlinable"
875        ]));
876        assert!(roundtrip(&[
877            b"abc",
878            b"a_very_very_long_string_that_is_not_inlinable"
879        ]));
880    }
881
882    #[test]
883    fn extend_with_inlinable_fastpath() {
884        assert!(roundtrip(&[b"abc", b"defg", b"hix"]));
885        assert!(roundtrip(&[b"abc", b"defg", b"hix", b"xyza1234abcd"]));
886    }
887
888    #[test]
889    fn extend_with_inlinable_eq_len_fastpath() {
890        assert!(roundtrip(&[b"abc", b"def", b"hix"]));
891        assert!(roundtrip(&[b"abc", b"def", b"hix", b"xyz"]));
892    }
893
894    #[test]
895    fn extend_with_not_inlinable_fastpath() {
896        assert!(roundtrip(&[
897            b"a_very_long_string123",
898            b"a_longer_string_than_the_previous"
899        ]));
900    }
901}