vortex_array/arrays/varbin/
builder.rs1use num_traits::AsPrimitive;
5use vortex_buffer::BitBufferMut;
6use vortex_buffer::BufferMut;
7use vortex_error::vortex_panic;
8
9use crate::IntoArray;
10#[cfg(debug_assertions)]
11use crate::LEGACY_SESSION;
12#[cfg(debug_assertions)]
13use crate::VortexSessionExecute;
14use crate::arrays::PrimitiveArray;
15use crate::arrays::VarBinArray;
16use crate::dtype::DType;
17use crate::dtype::IntegerPType;
18use crate::expr::stats::Precision;
19use crate::expr::stats::Stat;
20use crate::validity::Validity;
21
22pub struct VarBinBuilder<O: IntegerPType> {
23 offsets: BufferMut<O>,
24 data: BufferMut<u8>,
25 validity: BitBufferMut,
26}
27
28impl<O: IntegerPType> Default for VarBinBuilder<O> {
29 fn default() -> Self {
30 Self::new()
31 }
32}
33
34impl<O: IntegerPType> VarBinBuilder<O> {
35 pub fn new() -> Self {
36 Self::with_capacity(0)
37 }
38
39 pub fn with_capacity(len: usize) -> Self {
40 let mut offsets = BufferMut::with_capacity(len + 1);
41 offsets.push(O::zero());
42 Self {
43 offsets,
44 data: BufferMut::empty(),
45 validity: BitBufferMut::with_capacity(len),
46 }
47 }
48
49 #[inline]
50 pub fn append(&mut self, value: Option<&[u8]>) {
51 match value {
52 Some(v) => self.append_value(v),
53 None => self.append_null(),
54 }
55 }
56
57 #[inline]
58 pub fn append_value(&mut self, value: impl AsRef<[u8]>) {
59 let slice = value.as_ref();
60 self.offsets
61 .push(O::from(self.data.len() + slice.len()).unwrap_or_else(|| {
62 vortex_panic!(
63 "Failed to convert sum of {} and {} to offset of type {}",
64 self.data.len(),
65 slice.len(),
66 std::any::type_name::<O>()
67 )
68 }));
69 self.data.extend_from_slice(slice);
70 self.validity.append_true();
71 }
72
73 #[inline]
74 pub fn append_null(&mut self) {
75 self.offsets.push(self.offsets[self.offsets.len() - 1]);
76 self.validity.append_false();
77 }
78
79 #[inline]
80 pub fn append_n_nulls(&mut self, n: usize) {
81 self.offsets.push_n(self.offsets[self.offsets.len() - 1], n);
82 self.validity.append_n(false, n);
83 }
84
85 #[inline]
86 pub fn append_values(&mut self, values: &[u8], end_offsets: impl Iterator<Item = O>, num: usize)
87 where
88 O: 'static,
89 usize: AsPrimitive<O>,
90 {
91 self.offsets
92 .extend(end_offsets.map(|offset| offset + self.data.len().as_()));
93 self.data.extend_from_slice(values);
94 self.validity.append_n(true, num);
95 }
96
97 pub fn finish(self, dtype: DType) -> VarBinArray {
98 let offsets = PrimitiveArray::new(self.offsets.freeze(), Validity::NonNullable);
99 let nulls = self.validity.freeze();
100
101 let validity = Validity::from_bit_buffer(nulls, dtype.nullability());
102
103 #[cfg(debug_assertions)]
107 {
108 let offsets_are_sorted = offsets
109 .statistics()
110 .compute_is_sorted(&mut LEGACY_SESSION.create_execution_ctx())
111 .unwrap_or(false);
112 debug_assert!(offsets_are_sorted, "VarBinBuilder offsets must be sorted");
113 }
114 offsets
115 .statistics()
116 .set(Stat::IsSorted, Precision::Exact(true.into()));
117
118 unsafe {
124 VarBinArray::new_unchecked(offsets.into_array(), self.data.freeze(), dtype, validity)
125 }
126 }
127}
128
129#[cfg(test)]
130mod tests {
131 use vortex_error::VortexResult;
132
133 use crate::LEGACY_SESSION;
134 use crate::VortexSessionExecute;
135 use crate::arrays::varbin::VarBinArrayExt;
136 use crate::arrays::varbin::builder::VarBinBuilder;
137 use crate::dtype::DType;
138 use crate::dtype::Nullability::Nullable;
139 use crate::expr::stats::Precision;
140 use crate::expr::stats::Stat;
141 use crate::expr::stats::StatsProviderExt;
142 use crate::scalar::Scalar;
143
144 #[test]
145 fn test_builder() {
146 let mut builder = VarBinBuilder::<i32>::with_capacity(0);
147 builder.append(Some(b"hello"));
148 builder.append(None);
149 builder.append(Some(b"world"));
150 let array = builder.finish(DType::Utf8(Nullable));
151
152 assert_eq!(array.len(), 3);
153 assert_eq!(array.dtype().nullability(), Nullable);
154 assert_eq!(
155 array
156 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
157 .unwrap(),
158 Scalar::utf8("hello".to_string(), Nullable)
159 );
160 assert!(
161 array
162 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
163 .unwrap()
164 .is_null()
165 );
166 }
167
168 #[test]
169 fn offsets_have_is_sorted_stat() -> VortexResult<()> {
170 let mut builder = VarBinBuilder::<i32>::with_capacity(0);
171 builder.append_value(b"aaa");
172 builder.append_null();
173 builder.append_value(b"bbb");
174 let array = builder.finish(DType::Utf8(Nullable));
175
176 let is_sorted = array
177 .offsets()
178 .statistics()
179 .with_typed_stats_set(|s| s.get_as::<bool>(Stat::IsSorted));
180 assert_eq!(is_sorted, Some(Precision::Exact(true)));
181 Ok(())
182 }
183
184 #[test]
185 fn empty_builder_offsets_have_is_sorted_stat() -> VortexResult<()> {
186 let builder = VarBinBuilder::<i32>::new();
187 let array = builder.finish(DType::Utf8(Nullable));
188
189 let is_sorted = array
190 .offsets()
191 .statistics()
192 .with_typed_stats_set(|s| s.get_as::<bool>(Stat::IsSorted));
193 assert_eq!(is_sorted, Some(Precision::Exact(true)));
194 Ok(())
195 }
196}