vortex_array/arrays/varbinview/compute/
zip.rs1use std::ops::Range;
5
6use vortex_buffer::BufferMut;
7use vortex_error::VortexResult;
8use vortex_error::vortex_bail;
9use vortex_mask::AllOr;
10use vortex_mask::Mask;
11
12use crate::Array;
13use crate::ArrayRef;
14use crate::ExecutionCtx;
15use crate::arrays::BinaryView;
16use crate::arrays::VarBinViewArray;
17use crate::arrays::VarBinViewVTable;
18use crate::builders::DeduplicatedBuffers;
19use crate::builders::LazyBitBufferBuilder;
20use crate::expr::ZipKernel;
21
22impl ZipKernel for VarBinViewVTable {
25 fn zip(
26 if_true: &VarBinViewArray,
27 if_false: &dyn Array,
28 mask: &Mask,
29 _ctx: &mut ExecutionCtx,
30 ) -> VortexResult<Option<ArrayRef>> {
31 let Some(if_false) = if_false.as_opt::<VarBinViewVTable>() else {
32 return Ok(None);
33 };
34
35 if !if_true.dtype().eq_ignore_nullability(if_false.dtype()) {
36 vortex_bail!("input arrays to zip must have the same dtype");
37 }
38
39 let len = if_true.len();
40 let dtype = if_true
41 .dtype()
42 .union_nullability(if_false.dtype().nullability());
43
44 let mut buffers = DeduplicatedBuffers::default();
47 let true_lookup =
48 buffers.extend_from_iter(if_true.buffers().iter().map(|b| b.as_host().clone()));
49 let false_lookup =
50 buffers.extend_from_iter(if_false.buffers().iter().map(|b| b.as_host().clone()));
51
52 let mut views_builder = BufferMut::<BinaryView>::with_capacity(len);
53 let mut validity_builder = LazyBitBufferBuilder::new(len);
54
55 let true_validity = if_true.validity_mask()?;
56 let false_validity = if_false.validity_mask()?;
57
58 match mask.slices() {
59 AllOr::All => push_range(
60 if_true,
61 &true_lookup,
62 &true_validity,
63 0..len,
64 &mut views_builder,
65 &mut validity_builder,
66 ),
67 AllOr::None => push_range(
68 if_false,
69 &false_lookup,
70 &false_validity,
71 0..len,
72 &mut views_builder,
73 &mut validity_builder,
74 ),
75 AllOr::Some(slices) => {
76 let mut pos = 0;
77 for (start, end) in slices {
78 if pos < *start {
79 push_range(
80 if_false,
81 &false_lookup,
82 &false_validity,
83 pos..*start,
84 &mut views_builder,
85 &mut validity_builder,
86 );
87 }
88 push_range(
89 if_true,
90 &true_lookup,
91 &true_validity,
92 *start..*end,
93 &mut views_builder,
94 &mut validity_builder,
95 );
96 pos = *end;
97 }
98 if pos < len {
99 push_range(
100 if_false,
101 &false_lookup,
102 &false_validity,
103 pos..len,
104 &mut views_builder,
105 &mut validity_builder,
106 );
107 }
108 }
109 }
110
111 let validity = validity_builder.finish_with_nullability(dtype.nullability());
112
113 let array = unsafe {
116 VarBinViewArray::new_unchecked(
117 views_builder.freeze(),
118 buffers.finish(),
119 dtype,
120 validity,
121 )
122 };
123
124 Ok(Some(array.to_array()))
125 }
126}
127
128fn push_range(
129 array: &VarBinViewArray,
130 buffer_lookup: &[u32],
131 validity: &Mask,
132 range: Range<usize>,
133 views_builder: &mut BufferMut<BinaryView>,
134 validity_builder: &mut LazyBitBufferBuilder,
135) {
136 let views = array.views();
137
138 match validity.bit_buffer() {
139 AllOr::All => {
140 for idx in range {
141 push_view(
142 views[idx],
143 buffer_lookup,
144 true,
145 views_builder,
146 validity_builder,
147 );
148 }
149 }
150 AllOr::None => {
151 for _ in range {
152 push_view(
153 BinaryView::empty_view(),
154 buffer_lookup,
155 false,
156 views_builder,
157 validity_builder,
158 );
159 }
160 }
161 AllOr::Some(bit_buffer) => {
162 for idx in range {
163 let is_valid = bit_buffer.value(idx);
164 push_view(
165 views[idx],
166 buffer_lookup,
167 is_valid,
168 views_builder,
169 validity_builder,
170 );
171 }
172 }
173 }
174}
175
176#[inline]
177fn push_view(
178 view: BinaryView,
179 buffer_lookup: &[u32],
180 is_valid: bool,
181 views_builder: &mut BufferMut<BinaryView>,
182 validity_builder: &mut LazyBitBufferBuilder,
183) {
184 if !is_valid {
185 views_builder.push(BinaryView::empty_view());
186 validity_builder.append_null();
187 return;
188 }
189
190 let adjusted = if view.is_inlined() {
191 view
192 } else {
193 let view_ref = view.as_view();
194 view_ref
195 .with_buffer_and_offset(
196 buffer_lookup[view_ref.buffer_index as usize],
197 view_ref.offset,
198 )
199 .into()
200 };
201
202 views_builder.push(adjusted);
203 validity_builder.append_non_null();
204}
205
206#[cfg(test)]
207mod tests {
208 use vortex_dtype::DType;
209 use vortex_dtype::Nullability;
210 use vortex_mask::Mask;
211
212 use crate::accessor::ArrayAccessor;
213 use crate::arrays::VarBinViewArray;
214 use crate::canonical::ToCanonical;
215 use crate::compute::zip;
216
217 #[test]
218 fn zip_varbinview_kernel_zips() {
219 let a = VarBinViewArray::from_iter(
220 [
221 Some("aaaaaaaaaaaaa_long"), Some("short"),
223 None,
224 Some("bbbbbbbbbbbbbbbb_long"),
225 Some("tiny"),
226 Some("cccccccccccccccc_long"),
227 ],
228 DType::Utf8(Nullability::Nullable),
229 );
230
231 let b = VarBinViewArray::from_iter(
232 [
233 Some("dddddddddddddddd_long"),
234 Some("eeeeeeeeeeeeeeee_long"),
235 Some("ffff"),
236 Some("gggggggggggggggg_long"),
237 None,
238 Some("hhhhhhhhhhhhhhhh_long"),
239 ],
240 DType::Utf8(Nullability::Nullable),
241 );
242
243 let mask = Mask::from_iter([true, false, true, false, false, true]);
244
245 let zipped = zip(a.as_ref(), b.as_ref(), &mask).unwrap().to_varbinview();
246
247 let values = zipped.with_iterator(|it| {
248 it.map(|v| v.map(|bytes| String::from_utf8(bytes.to_vec()).unwrap()))
249 .collect::<Vec<_>>()
250 });
251
252 assert_eq!(
253 values,
254 vec![
255 Some("aaaaaaaaaaaaa_long".to_string()),
256 Some("eeeeeeeeeeeeeeee_long".to_string()),
257 None,
258 Some("gggggggggggggggg_long".to_string()),
259 None,
260 Some("cccccccccccccccc_long".to_string())
261 ]
262 );
263 assert_eq!(zipped.len(), mask.len());
264 assert_eq!(zipped.dtype(), &DType::Utf8(Nullability::Nullable));
265 }
266}