arrow2/array/growable/
utf8.rs

1use std::sync::Arc;
2
3use crate::{
4    array::{Array, Utf8Array},
5    bitmap::MutableBitmap,
6    offset::{Offset, Offsets},
7};
8
9use super::{
10    utils::{build_extend_null_bits, extend_offset_values, ExtendNullBits},
11    Growable,
12};
13
14/// Concrete [`Growable`] for the [`Utf8Array`].
15pub struct GrowableUtf8<'a, O: Offset> {
16    arrays: Vec<&'a Utf8Array<O>>,
17    validity: MutableBitmap,
18    values: Vec<u8>,
19    offsets: Offsets<O>,
20    extend_null_bits: Vec<ExtendNullBits<'a>>,
21}
22
23impl<'a, O: Offset> GrowableUtf8<'a, O> {
24    /// Creates a new [`GrowableUtf8`] bound to `arrays` with a pre-allocated `capacity`.
25    /// # Panics
26    /// If `arrays` is empty.
27    pub fn new(arrays: Vec<&'a Utf8Array<O>>, mut use_validity: bool, capacity: usize) -> Self {
28        // if any of the arrays has nulls, insertions from any array requires setting bits
29        // as there is at least one array with nulls.
30        if arrays.iter().any(|array| array.null_count() > 0) {
31            use_validity = true;
32        };
33
34        let extend_null_bits = arrays
35            .iter()
36            .map(|array| build_extend_null_bits(*array, use_validity))
37            .collect();
38
39        Self {
40            arrays: arrays.to_vec(),
41            values: Vec::with_capacity(0),
42            offsets: Offsets::with_capacity(capacity),
43            validity: MutableBitmap::with_capacity(capacity),
44            extend_null_bits,
45        }
46    }
47
48    fn to(&mut self) -> Utf8Array<O> {
49        let validity = std::mem::take(&mut self.validity);
50        let offsets = std::mem::take(&mut self.offsets);
51        let values = std::mem::take(&mut self.values);
52
53        #[cfg(debug_assertions)]
54        {
55            crate::array::specification::try_check_utf8(&offsets, &values).unwrap();
56        }
57
58        unsafe {
59            Utf8Array::<O>::try_new_unchecked(
60                self.arrays[0].data_type().clone(),
61                offsets.into(),
62                values.into(),
63                validity.into(),
64            )
65            .unwrap()
66        }
67    }
68}
69
70impl<'a, O: Offset> Growable<'a> for GrowableUtf8<'a, O> {
71    fn extend(&mut self, index: usize, start: usize, len: usize) {
72        (self.extend_null_bits[index])(&mut self.validity, start, len);
73
74        let array = self.arrays[index];
75        let offsets = array.offsets();
76        let values = array.values();
77
78        self.offsets
79            .try_extend_from_slice(offsets, start, len)
80            .unwrap();
81
82        // values
83        extend_offset_values::<O>(&mut self.values, offsets.as_slice(), values, start, len);
84    }
85
86    fn extend_validity(&mut self, additional: usize) {
87        self.offsets.extend_constant(additional);
88        self.validity.extend_constant(additional, false);
89    }
90
91    #[inline]
92    fn len(&self) -> usize {
93        self.offsets.len() - 1
94    }
95
96    fn as_arc(&mut self) -> Arc<dyn Array> {
97        Arc::new(self.to())
98    }
99
100    fn as_box(&mut self) -> Box<dyn Array> {
101        Box::new(self.to())
102    }
103}
104
105impl<'a, O: Offset> From<GrowableUtf8<'a, O>> for Utf8Array<O> {
106    fn from(mut val: GrowableUtf8<'a, O>) -> Self {
107        val.to()
108    }
109}