arrow2/array/binary/
mutable_values.rs

1use std::{iter::FromIterator, sync::Arc};
2
3use crate::{
4    array::{
5        specification::try_check_offsets_bounds, Array, ArrayAccessor, ArrayValuesIter,
6        MutableArray, TryExtend, TryExtendFromSelf, TryPush,
7    },
8    bitmap::MutableBitmap,
9    datatypes::DataType,
10    error::{Error, Result},
11    offset::{Offset, Offsets},
12    trusted_len::TrustedLen,
13};
14
15use super::{BinaryArray, MutableBinaryArray};
16use crate::array::physical_binary::*;
17
18/// A [`MutableArray`] that builds a [`BinaryArray`]. It differs
19/// from [`MutableBinaryArray`] in that it builds non-null [`BinaryArray`].
20#[derive(Debug, Clone)]
21pub struct MutableBinaryValuesArray<O: Offset> {
22    data_type: DataType,
23    offsets: Offsets<O>,
24    values: Vec<u8>,
25}
26
27impl<O: Offset> From<MutableBinaryValuesArray<O>> for BinaryArray<O> {
28    fn from(other: MutableBinaryValuesArray<O>) -> Self {
29        BinaryArray::<O>::new(
30            other.data_type,
31            other.offsets.into(),
32            other.values.into(),
33            None,
34        )
35    }
36}
37
38impl<O: Offset> From<MutableBinaryValuesArray<O>> for MutableBinaryArray<O> {
39    fn from(other: MutableBinaryValuesArray<O>) -> Self {
40        MutableBinaryArray::<O>::try_new(other.data_type, other.offsets, other.values, None)
41            .expect("MutableBinaryValuesArray is consistent with MutableBinaryArray")
42    }
43}
44
45impl<O: Offset> Default for MutableBinaryValuesArray<O> {
46    fn default() -> Self {
47        Self::new()
48    }
49}
50
51impl<O: Offset> MutableBinaryValuesArray<O> {
52    /// Returns an empty [`MutableBinaryValuesArray`].
53    pub fn new() -> Self {
54        Self {
55            data_type: Self::default_data_type(),
56            offsets: Offsets::new(),
57            values: Vec::<u8>::new(),
58        }
59    }
60
61    /// Returns a [`MutableBinaryValuesArray`] created from its internal representation.
62    ///
63    /// # Errors
64    /// This function returns an error iff:
65    /// * The last offset is not equal to the values' length.
66    /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`.
67    /// # Implementation
68    /// This function is `O(1)`
69    pub fn try_new(data_type: DataType, offsets: Offsets<O>, values: Vec<u8>) -> Result<Self> {
70        try_check_offsets_bounds(&offsets, values.len())?;
71
72        if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
73            return Err(Error::oos(
74                "MutableBinaryValuesArray can only be initialized with DataType::Binary or DataType::LargeBinary",
75            ));
76        }
77
78        Ok(Self {
79            data_type,
80            offsets,
81            values,
82        })
83    }
84
85    /// Returns the default [`DataType`] of this container: [`DataType::Utf8`] or [`DataType::LargeUtf8`]
86    /// depending on the generic [`Offset`].
87    pub fn default_data_type() -> DataType {
88        BinaryArray::<O>::default_data_type()
89    }
90
91    /// Initializes a new [`MutableBinaryValuesArray`] with a pre-allocated capacity of items.
92    pub fn with_capacity(capacity: usize) -> Self {
93        Self::with_capacities(capacity, 0)
94    }
95
96    /// Initializes a new [`MutableBinaryValuesArray`] with a pre-allocated capacity of items and values.
97    pub fn with_capacities(capacity: usize, values: usize) -> Self {
98        Self {
99            data_type: Self::default_data_type(),
100            offsets: Offsets::<O>::with_capacity(capacity),
101            values: Vec::<u8>::with_capacity(values),
102        }
103    }
104
105    /// returns its values.
106    #[inline]
107    pub fn values(&self) -> &Vec<u8> {
108        &self.values
109    }
110
111    /// returns its offsets.
112    #[inline]
113    pub fn offsets(&self) -> &Offsets<O> {
114        &self.offsets
115    }
116
117    /// Reserves `additional` elements and `additional_values` on the values.
118    #[inline]
119    pub fn reserve(&mut self, additional: usize, additional_values: usize) {
120        self.offsets.reserve(additional);
121        self.values.reserve(additional_values);
122    }
123
124    /// Returns the capacity in number of items
125    pub fn capacity(&self) -> usize {
126        self.offsets.capacity()
127    }
128
129    /// Returns the length of this array
130    #[inline]
131    pub fn len(&self) -> usize {
132        self.offsets.len_proxy()
133    }
134
135    /// Pushes a new item to the array.
136    /// # Panic
137    /// This operation panics iff the length of all values (in bytes) exceeds `O` maximum value.
138    #[inline]
139    pub fn push<T: AsRef<[u8]>>(&mut self, value: T) {
140        self.try_push(value).unwrap()
141    }
142
143    /// Pop the last entry from [`MutableBinaryValuesArray`].
144    /// This function returns `None` iff this array is empty.
145    pub fn pop(&mut self) -> Option<Vec<u8>> {
146        if self.len() == 0 {
147            return None;
148        }
149        self.offsets.pop()?;
150        let start = self.offsets.last().to_usize();
151        let value = self.values.split_off(start);
152        Some(value.to_vec())
153    }
154
155    /// Returns the value of the element at index `i`.
156    /// # Panic
157    /// This function panics iff `i >= self.len`.
158    #[inline]
159    pub fn value(&self, i: usize) -> &[u8] {
160        assert!(i < self.len());
161        unsafe { self.value_unchecked(i) }
162    }
163
164    /// Returns the value of the element at index `i`.
165    /// # Safety
166    /// This function is safe iff `i < self.len`.
167    #[inline]
168    pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
169        // soundness: the invariant of the function
170        let (start, end) = self.offsets.start_end(i);
171
172        // soundness: the invariant of the struct
173        self.values.get_unchecked(start..end)
174    }
175
176    /// Returns an iterator of `&[u8]`
177    pub fn iter(&self) -> ArrayValuesIter<Self> {
178        ArrayValuesIter::new(self)
179    }
180
181    /// Shrinks the capacity of the [`MutableBinaryValuesArray`] to fit its current length.
182    pub fn shrink_to_fit(&mut self) {
183        self.values.shrink_to_fit();
184        self.offsets.shrink_to_fit();
185    }
186
187    /// Extract the low-end APIs from the [`MutableBinaryValuesArray`].
188    pub fn into_inner(self) -> (DataType, Offsets<O>, Vec<u8>) {
189        (self.data_type, self.offsets, self.values)
190    }
191}
192
193impl<O: Offset> MutableArray for MutableBinaryValuesArray<O> {
194    fn len(&self) -> usize {
195        self.len()
196    }
197
198    fn validity(&self) -> Option<&MutableBitmap> {
199        None
200    }
201
202    fn as_box(&mut self) -> Box<dyn Array> {
203        let (data_type, offsets, values) = std::mem::take(self).into_inner();
204        BinaryArray::new(data_type, offsets.into(), values.into(), None).boxed()
205    }
206
207    fn as_arc(&mut self) -> Arc<dyn Array> {
208        let (data_type, offsets, values) = std::mem::take(self).into_inner();
209        BinaryArray::new(data_type, offsets.into(), values.into(), None).arced()
210    }
211
212    fn data_type(&self) -> &DataType {
213        &self.data_type
214    }
215
216    fn as_any(&self) -> &dyn std::any::Any {
217        self
218    }
219
220    fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
221        self
222    }
223
224    #[inline]
225    fn push_null(&mut self) {
226        self.push::<&[u8]>(b"")
227    }
228
229    fn reserve(&mut self, additional: usize) {
230        self.reserve(additional, 0)
231    }
232
233    fn shrink_to_fit(&mut self) {
234        self.shrink_to_fit()
235    }
236}
237
238impl<O: Offset, P: AsRef<[u8]>> FromIterator<P> for MutableBinaryValuesArray<O> {
239    fn from_iter<I: IntoIterator<Item = P>>(iter: I) -> Self {
240        let (offsets, values) = values_iter(iter.into_iter());
241        Self::try_new(Self::default_data_type(), offsets, values).unwrap()
242    }
243}
244
245impl<O: Offset> MutableBinaryValuesArray<O> {
246    pub(crate) unsafe fn extend_from_trusted_len_iter<I, P>(
247        &mut self,
248        validity: &mut MutableBitmap,
249        iterator: I,
250    ) where
251        P: AsRef<[u8]>,
252        I: Iterator<Item = Option<P>>,
253    {
254        extend_from_trusted_len_iter(&mut self.offsets, &mut self.values, validity, iterator);
255    }
256
257    /// Extends the [`MutableBinaryValuesArray`] from a [`TrustedLen`]
258    #[inline]
259    pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
260    where
261        P: AsRef<[u8]>,
262        I: TrustedLen<Item = P>,
263    {
264        unsafe { self.extend_trusted_len_unchecked(iterator) }
265    }
266
267    /// Extends [`MutableBinaryValuesArray`] from an iterator of trusted len.
268    /// # Safety
269    /// The iterator must be trusted len.
270    #[inline]
271    pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
272    where
273        P: AsRef<[u8]>,
274        I: Iterator<Item = P>,
275    {
276        extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator);
277    }
278
279    /// Creates a [`MutableBinaryValuesArray`] from a [`TrustedLen`]
280    #[inline]
281    pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
282    where
283        P: AsRef<[u8]>,
284        I: TrustedLen<Item = P>,
285    {
286        // soundness: I is `TrustedLen`
287        unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
288    }
289
290    /// Returns a new [`MutableBinaryValuesArray`] from an iterator of trusted length.
291    /// # Safety
292    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
293    /// I.e. that `size_hint().1` correctly reports its length.
294    #[inline]
295    pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
296    where
297        P: AsRef<[u8]>,
298        I: Iterator<Item = P>,
299    {
300        let (offsets, values) = trusted_len_values_iter(iterator);
301        Self::try_new(Self::default_data_type(), offsets, values).unwrap()
302    }
303
304    /// Returns a new [`MutableBinaryValuesArray`] from an iterator.
305    /// # Error
306    /// This operation errors iff the total length in bytes on the iterator exceeds `O`'s maximum value.
307    /// (`i32::MAX` or `i64::MAX` respectively).
308    pub fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = P>>(iter: I) -> Result<Self> {
309        let iterator = iter.into_iter();
310        let (lower, _) = iterator.size_hint();
311        let mut array = Self::with_capacity(lower);
312        for item in iterator {
313            array.try_push(item)?;
314        }
315        Ok(array)
316    }
317
318    /// Extend with a fallible iterator
319    pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>
320    where
321        E: std::error::Error,
322        I: IntoIterator<Item = std::result::Result<T, E>>,
323        T: AsRef<[u8]>,
324    {
325        let mut iter = iter.into_iter();
326        self.reserve(iter.size_hint().0, 0);
327        iter.try_for_each(|x| {
328            self.push(x?);
329            Ok(())
330        })
331    }
332}
333
334impl<O: Offset, T: AsRef<[u8]>> Extend<T> for MutableBinaryValuesArray<O> {
335    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
336        extend_from_values_iter(&mut self.offsets, &mut self.values, iter.into_iter());
337    }
338}
339
340impl<O: Offset, T: AsRef<[u8]>> TryExtend<T> for MutableBinaryValuesArray<O> {
341    fn try_extend<I: IntoIterator<Item = T>>(&mut self, iter: I) -> Result<()> {
342        let mut iter = iter.into_iter();
343        self.reserve(iter.size_hint().0, 0);
344        iter.try_for_each(|x| self.try_push(x))
345    }
346}
347
348impl<O: Offset, T: AsRef<[u8]>> TryPush<T> for MutableBinaryValuesArray<O> {
349    #[inline]
350    fn try_push(&mut self, value: T) -> Result<()> {
351        let bytes = value.as_ref();
352        self.values.extend_from_slice(bytes);
353        self.offsets.try_push_usize(bytes.len())
354    }
355}
356
357unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableBinaryValuesArray<O> {
358    type Item = &'a [u8];
359
360    #[inline]
361    unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
362        self.value_unchecked(index)
363    }
364
365    #[inline]
366    fn len(&self) -> usize {
367        self.len()
368    }
369}
370
371impl<O: Offset> TryExtendFromSelf for MutableBinaryValuesArray<O> {
372    fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
373        self.values.extend_from_slice(&other.values);
374        self.offsets.try_extend_from_self(&other.offsets)
375    }
376}