vortex_array/arrays/varbin/
builder.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_buffer::NullBufferBuilder;
5use num_traits::{AsPrimitive, PrimInt};
6use vortex_buffer::BufferMut;
7use vortex_dtype::{DType, NativePType};
8use vortex_error::{VortexExpect as _, vortex_panic};
9
10use crate::IntoArray;
11use crate::arrays::primitive::PrimitiveArray;
12use crate::arrays::varbin::VarBinArray;
13use crate::validity::Validity;
14
15pub struct VarBinBuilder<O: NativePType> {
16    offsets: BufferMut<O>,
17    data: BufferMut<u8>,
18    validity: NullBufferBuilder,
19}
20
21impl<O: NativePType + PrimInt> Default for VarBinBuilder<O> {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl<O: NativePType + PrimInt> VarBinBuilder<O> {
28    pub fn new() -> Self {
29        Self::with_capacity(0)
30    }
31
32    pub fn with_capacity(len: usize) -> Self {
33        let mut offsets = BufferMut::with_capacity(len + 1);
34        offsets.push(O::zero());
35        Self {
36            offsets,
37            data: BufferMut::empty(),
38            validity: NullBufferBuilder::new(len),
39        }
40    }
41
42    #[inline]
43    pub fn append(&mut self, value: Option<&[u8]>) {
44        match value {
45            Some(v) => self.append_value(v),
46            None => self.append_null(),
47        }
48    }
49
50    #[inline]
51    pub fn append_value(&mut self, value: impl AsRef<[u8]>) {
52        let slice = value.as_ref();
53        self.offsets
54            .push(O::from(self.data.len() + slice.len()).unwrap_or_else(|| {
55                vortex_panic!(
56                    "Failed to convert sum of {} and {} to offset of type {}",
57                    self.data.len(),
58                    slice.len(),
59                    std::any::type_name::<O>()
60                )
61            }));
62        self.data.extend_from_slice(slice);
63        self.validity.append_non_null();
64    }
65
66    #[inline]
67    pub fn append_null(&mut self) {
68        self.offsets.push(self.offsets[self.offsets.len() - 1]);
69        self.validity.append_null();
70    }
71
72    #[inline]
73    pub fn append_n_nulls(&mut self, n: usize) {
74        self.offsets.push_n(self.offsets[self.offsets.len() - 1], n);
75        self.validity.append_n_nulls(n);
76    }
77
78    #[inline]
79    pub fn append_values(&mut self, values: &[u8], end_offsets: impl Iterator<Item = O>, num: usize)
80    where
81        O: 'static,
82        usize: AsPrimitive<O>,
83    {
84        self.offsets
85            .extend(end_offsets.map(|offset| offset + self.data.len().as_()));
86        self.data.extend_from_slice(values);
87        self.validity.append_n_non_nulls(num);
88    }
89
90    pub fn finish(mut self, dtype: DType) -> VarBinArray {
91        let offsets = PrimitiveArray::new(self.offsets.freeze(), Validity::NonNullable);
92        let nulls = self.validity.finish();
93
94        let validity = if dtype.is_nullable() {
95            nulls.map(Validity::from).unwrap_or(Validity::AllValid)
96        } else {
97            assert!(nulls.is_none(), "dtype and validity mismatch");
98            Validity::NonNullable
99        };
100
101        VarBinArray::try_new(offsets.into_array(), self.data.freeze(), dtype, validity)
102            .vortex_expect("Unexpected error while building VarBinArray")
103    }
104}
105
106#[cfg(test)]
107mod test {
108    use vortex_dtype::DType;
109    use vortex_dtype::Nullability::Nullable;
110    use vortex_scalar::Scalar;
111
112    use crate::arrays::varbin::builder::VarBinBuilder;
113
114    #[test]
115    fn test_builder() {
116        let mut builder = VarBinBuilder::<i32>::with_capacity(0);
117        builder.append(Some(b"hello"));
118        builder.append(None);
119        builder.append(Some(b"world"));
120        let array = builder.finish(DType::Utf8(Nullable));
121
122        assert_eq!(array.len(), 3);
123        assert_eq!(array.dtype().nullability(), Nullable);
124        assert_eq!(
125            array.scalar_at(0).unwrap(),
126            Scalar::utf8("hello".to_string(), Nullable)
127        );
128        assert!(array.scalar_at(1).unwrap().is_null());
129    }
130}