vortex_array/arrays/varbin/
builder.rs1use num_traits::AsPrimitive;
5use vortex_buffer::BitBufferMut;
6use vortex_buffer::BufferMut;
7use vortex_dtype::DType;
8use vortex_dtype::IntegerPType;
9use vortex_error::vortex_panic;
10
11use crate::IntoArray;
12use crate::arrays::primitive::PrimitiveArray;
13use crate::arrays::varbin::VarBinArray;
14use crate::expr::stats::Precision;
15use crate::expr::stats::Stat;
16use crate::validity::Validity;
17
18pub struct VarBinBuilder<O: IntegerPType> {
19 offsets: BufferMut<O>,
20 data: BufferMut<u8>,
21 validity: BitBufferMut,
22}
23
24impl<O: IntegerPType> Default for VarBinBuilder<O> {
25 fn default() -> Self {
26 Self::new()
27 }
28}
29
30impl<O: IntegerPType> VarBinBuilder<O> {
31 pub fn new() -> Self {
32 Self::with_capacity(0)
33 }
34
35 pub fn with_capacity(len: usize) -> Self {
36 let mut offsets = BufferMut::with_capacity(len + 1);
37 offsets.push(O::zero());
38 Self {
39 offsets,
40 data: BufferMut::empty(),
41 validity: BitBufferMut::with_capacity(len),
42 }
43 }
44
45 #[inline]
46 pub fn append(&mut self, value: Option<&[u8]>) {
47 match value {
48 Some(v) => self.append_value(v),
49 None => self.append_null(),
50 }
51 }
52
53 #[inline]
54 pub fn append_value(&mut self, value: impl AsRef<[u8]>) {
55 let slice = value.as_ref();
56 self.offsets
57 .push(O::from(self.data.len() + slice.len()).unwrap_or_else(|| {
58 vortex_panic!(
59 "Failed to convert sum of {} and {} to offset of type {}",
60 self.data.len(),
61 slice.len(),
62 std::any::type_name::<O>()
63 )
64 }));
65 self.data.extend_from_slice(slice);
66 self.validity.append_true();
67 }
68
69 #[inline]
70 pub fn append_null(&mut self) {
71 self.offsets.push(self.offsets[self.offsets.len() - 1]);
72 self.validity.append_false();
73 }
74
75 #[inline]
76 pub fn append_n_nulls(&mut self, n: usize) {
77 self.offsets.push_n(self.offsets[self.offsets.len() - 1], n);
78 self.validity.append_n(false, n);
79 }
80
81 #[inline]
82 pub fn append_values(&mut self, values: &[u8], end_offsets: impl Iterator<Item = O>, num: usize)
83 where
84 O: 'static,
85 usize: AsPrimitive<O>,
86 {
87 self.offsets
88 .extend(end_offsets.map(|offset| offset + self.data.len().as_()));
89 self.data.extend_from_slice(values);
90 self.validity.append_n(true, num);
91 }
92
93 pub fn finish(self, dtype: DType) -> VarBinArray {
94 let offsets = PrimitiveArray::new(self.offsets.freeze(), Validity::NonNullable);
95 let nulls = self.validity.freeze();
96
97 let validity = Validity::from_bit_buffer(nulls, dtype.nullability());
98
99 debug_assert!(
103 offsets.statistics().compute_is_sorted().unwrap_or(false),
104 "VarBinBuilder offsets must be sorted"
105 );
106 offsets
107 .statistics()
108 .set(Stat::IsSorted, Precision::Exact(true.into()));
109
110 unsafe {
116 VarBinArray::new_unchecked(offsets.into_array(), self.data.freeze(), dtype, validity)
117 }
118 }
119}
120
121#[cfg(test)]
122mod tests {
123 use vortex_dtype::DType;
124 use vortex_dtype::Nullability::Nullable;
125 use vortex_error::VortexResult;
126
127 use crate::arrays::varbin::builder::VarBinBuilder;
128 use crate::expr::stats::Precision;
129 use crate::expr::stats::Stat;
130 use crate::expr::stats::StatsProviderExt;
131 use crate::scalar::Scalar;
132
133 #[test]
134 fn test_builder() {
135 let mut builder = VarBinBuilder::<i32>::with_capacity(0);
136 builder.append(Some(b"hello"));
137 builder.append(None);
138 builder.append(Some(b"world"));
139 let array = builder.finish(DType::Utf8(Nullable));
140
141 assert_eq!(array.len(), 3);
142 assert_eq!(array.dtype().nullability(), Nullable);
143 assert_eq!(
144 array.scalar_at(0).unwrap(),
145 Scalar::utf8("hello".to_string(), Nullable)
146 );
147 assert!(array.scalar_at(1).unwrap().is_null());
148 }
149
150 #[test]
151 fn offsets_have_is_sorted_stat() -> VortexResult<()> {
152 let mut builder = VarBinBuilder::<i32>::with_capacity(0);
153 builder.append_value(b"aaa");
154 builder.append_null();
155 builder.append_value(b"bbb");
156 let array = builder.finish(DType::Utf8(Nullable));
157
158 let is_sorted = array
159 .offsets()
160 .statistics()
161 .with_typed_stats_set(|s| s.get_as::<bool>(Stat::IsSorted));
162 assert_eq!(is_sorted, Some(Precision::Exact(true)));
163 Ok(())
164 }
165
166 #[test]
167 fn empty_builder_offsets_have_is_sorted_stat() -> VortexResult<()> {
168 let builder = VarBinBuilder::<i32>::new();
169 let array = builder.finish(DType::Utf8(Nullable));
170
171 let is_sorted = array
172 .offsets()
173 .statistics()
174 .with_typed_stats_set(|s| s.get_as::<bool>(Stat::IsSorted));
175 assert_eq!(is_sorted, Some(Precision::Exact(true)));
176 Ok(())
177 }
178}