vortex_array/arrays/varbin/
builder.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::AsPrimitive;
5use vortex_buffer::{BitBufferMut, BufferMut};
6use vortex_dtype::{DType, IntegerPType};
7use vortex_error::vortex_panic;
8
9use crate::IntoArray;
10use crate::arrays::primitive::PrimitiveArray;
11use crate::arrays::varbin::VarBinArray;
12use crate::validity::Validity;
13
14pub struct VarBinBuilder<O: IntegerPType> {
15    offsets: BufferMut<O>,
16    data: BufferMut<u8>,
17    validity: BitBufferMut,
18}
19
20impl<O: IntegerPType> Default for VarBinBuilder<O> {
21    fn default() -> Self {
22        Self::new()
23    }
24}
25
26impl<O: IntegerPType> VarBinBuilder<O> {
27    pub fn new() -> Self {
28        Self::with_capacity(0)
29    }
30
31    pub fn with_capacity(len: usize) -> Self {
32        let mut offsets = BufferMut::with_capacity(len + 1);
33        offsets.push(O::zero());
34        Self {
35            offsets,
36            data: BufferMut::empty(),
37            validity: BitBufferMut::with_capacity(len),
38        }
39    }
40
41    #[inline]
42    pub fn append(&mut self, value: Option<&[u8]>) {
43        match value {
44            Some(v) => self.append_value(v),
45            None => self.append_null(),
46        }
47    }
48
49    #[inline]
50    pub fn append_value(&mut self, value: impl AsRef<[u8]>) {
51        let slice = value.as_ref();
52        self.offsets
53            .push(O::from(self.data.len() + slice.len()).unwrap_or_else(|| {
54                vortex_panic!(
55                    "Failed to convert sum of {} and {} to offset of type {}",
56                    self.data.len(),
57                    slice.len(),
58                    std::any::type_name::<O>()
59                )
60            }));
61        self.data.extend_from_slice(slice);
62        self.validity.append_true();
63    }
64
65    #[inline]
66    pub fn append_null(&mut self) {
67        self.offsets.push(self.offsets[self.offsets.len() - 1]);
68        self.validity.append_false();
69    }
70
71    #[inline]
72    pub fn append_n_nulls(&mut self, n: usize) {
73        self.offsets.push_n(self.offsets[self.offsets.len() - 1], n);
74        self.validity.append_n(false, n);
75    }
76
77    #[inline]
78    pub fn append_values(&mut self, values: &[u8], end_offsets: impl Iterator<Item = O>, num: usize)
79    where
80        O: 'static,
81        usize: AsPrimitive<O>,
82    {
83        self.offsets
84            .extend(end_offsets.map(|offset| offset + self.data.len().as_()));
85        self.data.extend_from_slice(values);
86        self.validity.append_n(true, num);
87    }
88
89    pub fn finish(self, dtype: DType) -> VarBinArray {
90        let offsets = PrimitiveArray::new(self.offsets.freeze(), Validity::NonNullable);
91        let nulls = self.validity.freeze();
92
93        let validity = Validity::from_bit_buffer(nulls, dtype.nullability());
94
95        // SAFETY: The builder maintains all invariants:
96        // - Offsets are monotonically increasing starting from 0 (guaranteed by builder logic).
97        // - Bytes buffer contains exactly the data referenced by offsets.
98        // - Validity matches the dtype nullability.
99        // - UTF-8 validity is ensured by the caller when using DType::Utf8.
100        unsafe {
101            VarBinArray::new_unchecked(offsets.into_array(), self.data.freeze(), dtype, validity)
102        }
103    }
104}
105
106#[cfg(test)]
107mod tests {
108    use vortex_dtype::DType;
109    use vortex_dtype::Nullability::Nullable;
110    use vortex_scalar::Scalar;
111
112    use crate::arrays::varbin::builder::VarBinBuilder;
113
114    #[test]
115    fn test_builder() {
116        let mut builder = VarBinBuilder::<i32>::with_capacity(0);
117        builder.append(Some(b"hello"));
118        builder.append(None);
119        builder.append(Some(b"world"));
120        let array = builder.finish(DType::Utf8(Nullable));
121
122        assert_eq!(array.len(), 3);
123        assert_eq!(array.dtype().nullability(), Nullable);
124        assert_eq!(
125            array.scalar_at(0),
126            Scalar::utf8("hello".to_string(), Nullable)
127        );
128        assert!(array.scalar_at(1).is_null());
129    }
130}