vortex_vector/binaryview/
vector.rs1use std::fmt::Debug;
7use std::ops::BitAnd;
8use std::ops::RangeBounds;
9use std::sync::Arc;
10
11use vortex_buffer::Alignment;
12use vortex_buffer::Buffer;
13use vortex_buffer::ByteBuffer;
14use vortex_error::VortexExpect;
15use vortex_error::VortexResult;
16use vortex_error::vortex_ensure;
17use vortex_mask::Mask;
18
19use crate::VectorOps;
20use crate::binaryview::BinaryViewScalar;
21use crate::binaryview::BinaryViewType;
22use crate::binaryview::vector_mut::BinaryViewVectorMut;
23use crate::binaryview::view::BinaryView;
24use crate::binaryview::view::validate_views;
25
26#[derive(Debug, Clone)]
30pub struct BinaryViewVector<T: BinaryViewType> {
31 views: Buffer<BinaryView>,
33 buffers: Arc<Box<[ByteBuffer]>>,
35 validity: Mask,
37 _marker: std::marker::PhantomData<T>,
39}
40
41impl<T: BinaryViewType> BinaryViewVector<T> {
42 pub unsafe fn new_unchecked(
52 views: Buffer<BinaryView>,
53 buffers: Arc<Box<[ByteBuffer]>>,
54 validity: Mask,
55 ) -> Self {
56 if cfg!(debug_assertions) {
57 Self::new(views, buffers, validity)
58 } else {
59 Self {
60 views,
61 validity,
62 buffers,
63 _marker: std::marker::PhantomData,
64 }
65 }
66 }
67
68 pub fn new(views: Buffer<BinaryView>, buffers: Arc<Box<[ByteBuffer]>>, validity: Mask) -> Self {
75 Self::try_new(views, buffers, validity).vortex_expect("Failed to create `BinaryViewVector`")
76 }
77
78 pub fn try_new(
89 views: Buffer<BinaryView>,
90 buffers: Arc<Box<[ByteBuffer]>>,
91 validity: Mask,
92 ) -> VortexResult<Self> {
93 vortex_ensure!(
94 views.len() == validity.len(),
95 "views buffer length {} != validity length {}",
96 views.len(),
97 validity.len()
98 );
99
100 validate_views(
101 &views,
102 &*buffers,
103 |index| validity.value(index),
104 T::validate,
105 )?;
106
107 Ok(Self {
108 views,
109 buffers,
110 validity,
111 _marker: std::marker::PhantomData,
112 })
113 }
114
115 pub fn into_parts(self) -> (Buffer<BinaryView>, Arc<Box<[ByteBuffer]>>, Mask) {
117 (self.views, self.buffers, self.validity)
118 }
119
120 pub fn get(&self, index: usize) -> Option<T::Scalar> {
124 if !self.validity.value(index) {
125 return None;
126 }
127
128 let view = &self.views[index];
129 if view.is_inlined() {
130 let view = view.as_inlined();
131
132 let buffer = self
134 .views
135 .clone()
136 .into_byte_buffer()
137 .aligned(Alignment::none())
138 .slice_ref(&view.data[..view.size as usize]);
139
140 Some(unsafe { T::scalar_from_buffer_unchecked(buffer) })
144 } else {
145 let view_ref = view.as_view();
147 let buffer = &self.buffers[view_ref.buffer_index as usize];
148
149 let start = view_ref.offset as usize;
150 let length = view_ref.size as usize;
151 let buffer_slice = buffer.slice(start..start + length);
152
153 Some(unsafe { T::scalar_from_buffer_unchecked(buffer_slice) })
157 }
158 }
159
160 pub fn get_ref(&self, index: usize) -> Option<&T::Slice> {
164 if !self.validity.value(index) {
165 return None;
166 }
167
168 let view = &self.views[index];
169 if view.is_inlined() {
170 let view = view.as_inlined();
171 Some(unsafe { T::from_bytes_unchecked(&view.data[..view.size as usize]) })
175 } else {
176 let view_ref = view.as_view();
178 let buffer = &self.buffers[view_ref.buffer_index as usize];
179
180 let start = view_ref.offset as usize;
181 let length = view_ref.size as usize;
182
183 Some(unsafe { T::from_bytes_unchecked(&buffer.as_bytes()[start..start + length]) })
187 }
188 }
189
190 pub fn buffers(&self) -> &Arc<Box<[ByteBuffer]>> {
192 &self.buffers
193 }
194
195 pub fn views(&self) -> &Buffer<BinaryView> {
197 &self.views
198 }
199}
200
201impl<T: BinaryViewType> VectorOps for BinaryViewVector<T> {
202 type Mutable = BinaryViewVectorMut<T>;
203 type Scalar = BinaryViewScalar<T>;
204
205 fn len(&self) -> usize {
206 self.views.len()
207 }
208
209 fn validity(&self) -> &Mask {
210 &self.validity
211 }
212
213 fn mask_validity(&mut self, mask: &Mask) {
214 self.validity = self.validity.bitand(mask);
215 }
216
217 fn scalar_at(&self, index: usize) -> BinaryViewScalar<T> {
218 assert!(index < self.len());
219 BinaryViewScalar::<T>::new(self.get(index))
220 }
221
222 fn slice(&self, _range: impl RangeBounds<usize> + Clone + Debug) -> Self {
223 todo!()
224 }
225
226 fn clear(&mut self) {
227 self.views.clear();
228 self.validity = Mask::new_true(0);
229 self.buffers = Arc::new(Box::new([]));
230 }
231
232 fn try_into_mut(self) -> Result<BinaryViewVectorMut<T>, Self> {
233 let views_mut = match self.views.try_into_mut() {
234 Ok(views_mut) => views_mut,
235 Err(views) => {
236 return Err(Self {
237 views,
238 validity: self.validity,
239 buffers: self.buffers,
240 _marker: std::marker::PhantomData,
241 });
242 }
243 };
244
245 let validity_mut = match self.validity.try_into_mut() {
246 Ok(validity_mut) => validity_mut,
247 Err(validity) => {
248 return Err(Self {
249 views: views_mut.freeze(),
250 validity,
251 buffers: self.buffers,
252 _marker: std::marker::PhantomData,
253 });
254 }
255 };
256
257 let buffers_mut = match Arc::try_unwrap(self.buffers) {
258 Ok(buffers) => buffers.into_vec(),
259 Err(buffers) => {
260 buffers.iter().cloned().collect()
262 }
263 };
264
265 unsafe {
268 Ok(BinaryViewVectorMut::new_unchecked(
269 views_mut,
270 validity_mut,
271 buffers_mut,
272 ))
273 }
274 }
275
276 fn into_mut(self) -> BinaryViewVectorMut<T> {
277 let views_mut = self.views.into_mut();
278 let validity_mut = self.validity.into_mut();
279
280 let buffers_mut = Arc::try_unwrap(self.buffers)
283 .unwrap_or_else(|arc| (*arc).clone())
284 .into_vec();
285
286 unsafe { BinaryViewVectorMut::new_unchecked(views_mut, validity_mut, buffers_mut) }
289 }
290}
291
292#[cfg(test)]
293mod tests {
294 use std::sync::Arc;
295
296 use vortex_buffer::ByteBuffer;
297 use vortex_buffer::buffer;
298 use vortex_mask::Mask;
299
300 use crate::VectorMutOps;
301 use crate::VectorOps;
302 use crate::binaryview::StringVector;
303 use crate::binaryview::StringVectorMut;
304 use crate::binaryview::view::BinaryView;
305
306 #[test]
307 #[should_panic(expected = "views buffer length 1 != validity length 100")]
308 fn test_try_new_mismatch_validity_len() {
309 StringVector::try_new(
310 buffer![BinaryView::new_inlined(b"inlined")],
311 Arc::new(Box::new([])),
312 Mask::new_true(100),
313 )
314 .unwrap();
315 }
316
317 #[test]
318 #[should_panic(
319 expected = "view at index 0 references invalid buffer: 100 out of bounds for BinaryViewVector with 0 buffers"
320 )]
321 fn test_try_new_invalid_buffer_offset() {
322 StringVector::try_new(
323 buffer![BinaryView::make_view(b"bad buffer ptr", 100, 0)],
324 Arc::new(Box::new([])),
325 Mask::new_true(1),
326 )
327 .unwrap();
328 }
329
330 #[test]
331 #[should_panic(expected = "start offset 4294967295 out of bounds for buffer 0 with size 19")]
332 fn test_try_new_invalid_length() {
333 StringVector::try_new(
334 buffer![BinaryView::make_view(b"bad buffer ptr", 0, u32::MAX)],
335 Arc::new(Box::new([ByteBuffer::copy_from(b"a very short buffer")])),
336 Mask::new_true(1),
337 )
338 .unwrap();
339 }
340
341 #[test]
342 #[should_panic(expected = "view at index 0: inlined bytes failed utf-8 validation")]
343 fn test_try_new_invalid_utf8_inlined() {
344 StringVector::try_new(
345 buffer![BinaryView::new_inlined(b"\x80")],
346 Arc::new(Box::new([])),
347 Mask::new_true(1),
348 )
349 .unwrap();
350 }
351
352 #[test]
353 #[should_panic(expected = "view at index 0: outlined bytes failed utf-8 validation")]
354 fn test_try_new_invalid_utf8_outlined() {
355 let sequence = b"\xff".repeat(13);
357 StringVector::try_new(
358 buffer![BinaryView::make_view(&sequence, 0, 0)],
359 Arc::new(Box::new([ByteBuffer::copy_from(sequence)])),
360 Mask::new_true(1),
361 )
362 .unwrap();
363 }
364
365 #[test]
366 fn test_try_into_mut() {
367 let mut shared_vec = StringVectorMut::with_capacity(5);
368 shared_vec.append_nulls(2);
369 shared_vec.append_values("an example value", 2);
370 shared_vec.append_values("another example value", 1);
371
372 let shared_vec = shared_vec.freeze();
373
374 let shared_vec2 = shared_vec.clone();
376
377 let shared_vec = shared_vec.try_into_mut().unwrap_err();
380
381 drop(shared_vec2);
383
384 assert!(shared_vec.try_into_mut().is_ok());
385 }
386}