vortex_vector/binaryview/
vector.rs1use std::fmt::Debug;
7use std::ops::RangeBounds;
8use std::sync::Arc;
9
10use vortex_buffer::{Alignment, Buffer, ByteBuffer};
11use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
12use vortex_mask::Mask;
13
14use crate::binaryview::vector_mut::BinaryViewVectorMut;
15use crate::binaryview::view::{BinaryView, validate_views};
16use crate::binaryview::{BinaryViewScalar, BinaryViewType};
17use crate::{Scalar, VectorOps};
18
19#[derive(Debug, Clone)]
23pub struct BinaryViewVector<T: BinaryViewType> {
24 views: Buffer<BinaryView>,
26 buffers: Arc<Box<[ByteBuffer]>>,
28 validity: Mask,
30 _marker: std::marker::PhantomData<T>,
32}
33
34impl<T: BinaryViewType> BinaryViewVector<T> {
35 pub unsafe fn new_unchecked(
45 views: Buffer<BinaryView>,
46 buffers: Arc<Box<[ByteBuffer]>>,
47 validity: Mask,
48 ) -> Self {
49 if cfg!(debug_assertions) {
50 Self::new(views, buffers, validity)
51 } else {
52 Self {
53 views,
54 validity,
55 buffers,
56 _marker: std::marker::PhantomData,
57 }
58 }
59 }
60
61 pub fn new(views: Buffer<BinaryView>, buffers: Arc<Box<[ByteBuffer]>>, validity: Mask) -> Self {
68 Self::try_new(views, buffers, validity).vortex_expect("Failed to create `BinaryViewVector`")
69 }
70
71 pub fn try_new(
82 views: Buffer<BinaryView>,
83 buffers: Arc<Box<[ByteBuffer]>>,
84 validity: Mask,
85 ) -> VortexResult<Self> {
86 vortex_ensure!(
87 views.len() == validity.len(),
88 "views buffer length {} != validity length {}",
89 views.len(),
90 validity.len()
91 );
92
93 validate_views(
94 &views,
95 &*buffers,
96 |index| validity.value(index),
97 T::validate,
98 )?;
99
100 Ok(Self {
101 views,
102 buffers,
103 validity,
104 _marker: std::marker::PhantomData,
105 })
106 }
107
108 pub fn into_parts(self) -> (Buffer<BinaryView>, Arc<Box<[ByteBuffer]>>, Mask) {
110 (self.views, self.buffers, self.validity)
111 }
112
113 pub fn get(&self, index: usize) -> Option<T::Scalar> {
117 if !self.validity.value(index) {
118 return None;
119 }
120
121 let view = &self.views[index];
122 if view.is_inlined() {
123 let view = view.as_inlined();
124
125 let buffer = self
127 .views
128 .clone()
129 .into_byte_buffer()
130 .aligned(Alignment::none())
131 .slice_ref(&view.data[..view.size as usize]);
132
133 Some(unsafe { T::scalar_from_buffer_unchecked(buffer) })
137 } else {
138 let view_ref = view.as_view();
140 let buffer = &self.buffers[view_ref.buffer_index as usize];
141
142 let start = view_ref.offset as usize;
143 let length = view_ref.size as usize;
144 let buffer_slice = buffer.slice(start..start + length);
145
146 Some(unsafe { T::scalar_from_buffer_unchecked(buffer_slice) })
150 }
151 }
152
153 pub fn get_ref(&self, index: usize) -> Option<&T::Slice> {
157 if !self.validity.value(index) {
158 return None;
159 }
160
161 let view = &self.views[index];
162 if view.is_inlined() {
163 let view = view.as_inlined();
164 Some(unsafe { T::from_bytes_unchecked(&view.data[..view.size as usize]) })
168 } else {
169 let view_ref = view.as_view();
171 let buffer = &self.buffers[view_ref.buffer_index as usize];
172
173 let start = view_ref.offset as usize;
174 let length = view_ref.size as usize;
175
176 Some(unsafe { T::from_bytes_unchecked(&buffer.as_bytes()[start..start + length]) })
180 }
181 }
182
183 pub fn buffers(&self) -> &Arc<Box<[ByteBuffer]>> {
185 &self.buffers
186 }
187
188 pub fn views(&self) -> &Buffer<BinaryView> {
190 &self.views
191 }
192}
193
194impl<T: BinaryViewType> VectorOps for BinaryViewVector<T> {
195 type Mutable = BinaryViewVectorMut<T>;
196
197 fn len(&self) -> usize {
198 self.views.len()
199 }
200
201 fn validity(&self) -> &Mask {
202 &self.validity
203 }
204
205 fn scalar_at(&self, index: usize) -> Scalar {
206 assert!(index < self.len());
207 BinaryViewScalar::<T>::from(self.get(index)).into()
208 }
209
210 fn slice(&self, _range: impl RangeBounds<usize> + Clone + Debug) -> Self {
211 todo!()
212 }
213
214 fn try_into_mut(self) -> Result<BinaryViewVectorMut<T>, Self> {
215 let views_mut = match self.views.try_into_mut() {
216 Ok(views_mut) => views_mut,
217 Err(views) => {
218 return Err(Self {
219 views,
220 validity: self.validity,
221 buffers: self.buffers,
222 _marker: std::marker::PhantomData,
223 });
224 }
225 };
226
227 let validity_mut = match self.validity.try_into_mut() {
228 Ok(validity_mut) => validity_mut,
229 Err(validity) => {
230 return Err(Self {
231 views: views_mut.freeze(),
232 validity,
233 buffers: self.buffers,
234 _marker: std::marker::PhantomData,
235 });
236 }
237 };
238
239 let buffers_mut = match Arc::try_unwrap(self.buffers) {
240 Ok(buffers) => buffers.into_vec(),
241 Err(buffers) => {
242 buffers.iter().cloned().collect()
244 }
245 };
246
247 unsafe {
250 Ok(BinaryViewVectorMut::new_unchecked(
251 views_mut,
252 validity_mut,
253 buffers_mut,
254 ))
255 }
256 }
257
258 fn into_mut(self) -> BinaryViewVectorMut<T> {
259 let views_mut = self.views.into_mut();
260 let validity_mut = self.validity.into_mut();
261
262 let buffers_mut = Arc::try_unwrap(self.buffers)
265 .unwrap_or_else(|arc| (*arc).clone())
266 .into_vec();
267
268 unsafe { BinaryViewVectorMut::new_unchecked(views_mut, validity_mut, buffers_mut) }
271 }
272}
273
274#[cfg(test)]
275mod tests {
276 use std::sync::Arc;
277
278 use vortex_buffer::{ByteBuffer, buffer};
279 use vortex_mask::Mask;
280
281 use crate::binaryview::view::BinaryView;
282 use crate::binaryview::{StringVector, StringVectorMut};
283 use crate::{VectorMutOps, VectorOps};
284
285 #[test]
286 #[should_panic(expected = "views buffer length 1 != validity length 100")]
287 fn test_try_new_mismatch_validity_len() {
288 StringVector::try_new(
289 buffer![BinaryView::new_inlined(b"inlined")],
290 Arc::new(Box::new([])),
291 Mask::new_true(100),
292 )
293 .unwrap();
294 }
295
296 #[test]
297 #[should_panic(
298 expected = "view at index 0 references invalid buffer: 100 out of bounds for BinaryViewVector with 0 buffers"
299 )]
300 fn test_try_new_invalid_buffer_offset() {
301 StringVector::try_new(
302 buffer![BinaryView::make_view(b"bad buffer ptr", 100, 0)],
303 Arc::new(Box::new([])),
304 Mask::new_true(1),
305 )
306 .unwrap();
307 }
308
309 #[test]
310 #[should_panic(expected = "start offset 4294967295 out of bounds for buffer 0 with size 19")]
311 fn test_try_new_invalid_length() {
312 StringVector::try_new(
313 buffer![BinaryView::make_view(b"bad buffer ptr", 0, u32::MAX)],
314 Arc::new(Box::new([ByteBuffer::copy_from(b"a very short buffer")])),
315 Mask::new_true(1),
316 )
317 .unwrap();
318 }
319
320 #[test]
321 #[should_panic(expected = "view at index 0: inlined bytes failed utf-8 validation")]
322 fn test_try_new_invalid_utf8_inlined() {
323 StringVector::try_new(
324 buffer![BinaryView::new_inlined(b"\x80")],
325 Arc::new(Box::new([])),
326 Mask::new_true(1),
327 )
328 .unwrap();
329 }
330
331 #[test]
332 #[should_panic(expected = "view at index 0: outlined bytes failed utf-8 validation")]
333 fn test_try_new_invalid_utf8_outlined() {
334 let sequence = b"\xff".repeat(13);
336 StringVector::try_new(
337 buffer![BinaryView::make_view(&sequence, 0, 0)],
338 Arc::new(Box::new([ByteBuffer::copy_from(sequence)])),
339 Mask::new_true(1),
340 )
341 .unwrap();
342 }
343
344 #[test]
345 fn test_try_into_mut() {
346 let mut shared_vec = StringVectorMut::with_capacity(5);
347 shared_vec.append_nulls(2);
348 shared_vec.append_values("an example value", 2);
349 shared_vec.append_values("another example value", 1);
350
351 let shared_vec = shared_vec.freeze();
352
353 let shared_vec2 = shared_vec.clone();
355
356 let shared_vec = shared_vec.try_into_mut().unwrap_err();
359
360 drop(shared_vec2);
362
363 assert!(shared_vec.try_into_mut().is_ok());
364 }
365}