vortex_array/arrays/varbin/
builder.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::AsPrimitive;
5use vortex_buffer::BitBufferMut;
6use vortex_buffer::BufferMut;
7use vortex_dtype::DType;
8use vortex_dtype::IntegerPType;
9use vortex_error::vortex_panic;
10
11use crate::IntoArray;
12use crate::arrays::primitive::PrimitiveArray;
13use crate::arrays::varbin::VarBinArray;
14use crate::validity::Validity;
15
16pub struct VarBinBuilder<O: IntegerPType> {
17    offsets: BufferMut<O>,
18    data: BufferMut<u8>,
19    validity: BitBufferMut,
20}
21
22impl<O: IntegerPType> Default for VarBinBuilder<O> {
23    fn default() -> Self {
24        Self::new()
25    }
26}
27
28impl<O: IntegerPType> VarBinBuilder<O> {
29    pub fn new() -> Self {
30        Self::with_capacity(0)
31    }
32
33    pub fn with_capacity(len: usize) -> Self {
34        let mut offsets = BufferMut::with_capacity(len + 1);
35        offsets.push(O::zero());
36        Self {
37            offsets,
38            data: BufferMut::empty(),
39            validity: BitBufferMut::with_capacity(len),
40        }
41    }
42
43    #[inline]
44    pub fn append(&mut self, value: Option<&[u8]>) {
45        match value {
46            Some(v) => self.append_value(v),
47            None => self.append_null(),
48        }
49    }
50
51    #[inline]
52    pub fn append_value(&mut self, value: impl AsRef<[u8]>) {
53        let slice = value.as_ref();
54        self.offsets
55            .push(O::from(self.data.len() + slice.len()).unwrap_or_else(|| {
56                vortex_panic!(
57                    "Failed to convert sum of {} and {} to offset of type {}",
58                    self.data.len(),
59                    slice.len(),
60                    std::any::type_name::<O>()
61                )
62            }));
63        self.data.extend_from_slice(slice);
64        self.validity.append_true();
65    }
66
67    #[inline]
68    pub fn append_null(&mut self) {
69        self.offsets.push(self.offsets[self.offsets.len() - 1]);
70        self.validity.append_false();
71    }
72
73    #[inline]
74    pub fn append_n_nulls(&mut self, n: usize) {
75        self.offsets.push_n(self.offsets[self.offsets.len() - 1], n);
76        self.validity.append_n(false, n);
77    }
78
79    #[inline]
80    pub fn append_values(&mut self, values: &[u8], end_offsets: impl Iterator<Item = O>, num: usize)
81    where
82        O: 'static,
83        usize: AsPrimitive<O>,
84    {
85        self.offsets
86            .extend(end_offsets.map(|offset| offset + self.data.len().as_()));
87        self.data.extend_from_slice(values);
88        self.validity.append_n(true, num);
89    }
90
91    pub fn finish(self, dtype: DType) -> VarBinArray {
92        let offsets = PrimitiveArray::new(self.offsets.freeze(), Validity::NonNullable);
93        let nulls = self.validity.freeze();
94
95        let validity = Validity::from_bit_buffer(nulls, dtype.nullability());
96
97        // SAFETY: The builder maintains all invariants:
98        // - Offsets are monotonically increasing starting from 0 (guaranteed by builder logic).
99        // - Bytes buffer contains exactly the data referenced by offsets.
100        // - Validity matches the dtype nullability.
101        // - UTF-8 validity is ensured by the caller when using DType::Utf8.
102        unsafe {
103            VarBinArray::new_unchecked(offsets.into_array(), self.data.freeze(), dtype, validity)
104        }
105    }
106}
107
108#[cfg(test)]
109mod tests {
110    use vortex_dtype::DType;
111    use vortex_dtype::Nullability::Nullable;
112    use vortex_scalar::Scalar;
113
114    use crate::arrays::varbin::builder::VarBinBuilder;
115
116    #[test]
117    fn test_builder() {
118        let mut builder = VarBinBuilder::<i32>::with_capacity(0);
119        builder.append(Some(b"hello"));
120        builder.append(None);
121        builder.append(Some(b"world"));
122        let array = builder.finish(DType::Utf8(Nullable));
123
124        assert_eq!(array.len(), 3);
125        assert_eq!(array.dtype().nullability(), Nullable);
126        assert_eq!(
127            array.scalar_at(0),
128            Scalar::utf8("hello".to_string(), Nullable)
129        );
130        assert!(array.scalar_at(1).is_null());
131    }
132}