vortex_vector/binaryview/
vector_mut.rs1use std::sync::Arc;
7
8use vortex_buffer::BufferMut;
9use vortex_buffer::ByteBuffer;
10use vortex_buffer::ByteBufferMut;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_ensure;
14use vortex_mask::MaskMut;
15
16use crate::VectorMutOps;
17use crate::VectorOps;
18use crate::binaryview::BinaryViewScalar;
19use crate::binaryview::BinaryViewType;
20use crate::binaryview::vector::BinaryViewVector;
21use crate::binaryview::view::BinaryView;
22use crate::binaryview::view::validate_views;
23
24const BUFFER_CAPACITY: usize = 2 * 1024 * 1024;
26
27#[derive(Clone, Debug)]
31pub struct BinaryViewVectorMut<T: BinaryViewType> {
32 views: BufferMut<BinaryView>,
34 validity: MaskMut,
36
37 buffers: Vec<ByteBuffer>,
39 open_buffer: Option<ByteBufferMut>,
41
42 _marker: std::marker::PhantomData<T>,
44}
45
46impl<T: BinaryViewType> BinaryViewVectorMut<T> {
47 pub fn new(views: BufferMut<BinaryView>, buffers: Vec<ByteBuffer>, validity: MaskMut) -> Self {
54 Self::try_new(views, buffers, validity)
55 .vortex_expect("Failed to create `BinaryViewVectorMut`")
56 }
57
58 pub fn with_capacity(capacity: usize) -> Self {
62 Self::new(
63 BufferMut::with_capacity(capacity),
64 Vec::new(),
65 MaskMut::with_capacity(capacity),
66 )
67 }
68
69 pub fn try_new(
77 views: BufferMut<BinaryView>,
78 buffers: Vec<ByteBuffer>,
79 validity: MaskMut,
80 ) -> VortexResult<Self> {
81 vortex_ensure!(
82 views.len() == validity.len(),
83 "views buffer length {} != validity length {}",
84 views.len(),
85 validity.len()
86 );
87
88 validate_views(&views, &buffers, |index| validity.value(index), T::validate)?;
89
90 Ok(Self {
91 views,
92 buffers,
93 validity,
94 open_buffer: None,
95 _marker: std::marker::PhantomData,
96 })
97 }
98
99 pub unsafe fn new_unchecked(
105 views: BufferMut<BinaryView>,
106 validity: MaskMut,
107 buffers: Vec<ByteBuffer>,
108 ) -> Self {
109 if cfg!(debug_assertions) {
110 Self::new(views, buffers, validity)
111 } else {
112 Self {
113 views,
114 buffers,
115 validity,
116 open_buffer: None,
117 _marker: std::marker::PhantomData,
118 }
119 }
120 }
121
122 pub unsafe fn views_mut(&mut self) -> &mut BufferMut<BinaryView> {
129 &mut self.views
130 }
131
132 pub unsafe fn validity_mut(&mut self) -> &mut MaskMut {
139 &mut self.validity
140 }
141
142 pub fn buffers(&mut self) -> &mut Vec<ByteBuffer> {
144 &mut self.buffers
145 }
146
147 pub fn append_values(&mut self, value: &T::Slice, n: usize) {
165 let bytes = value.as_ref();
166 if bytes.len() <= BinaryView::MAX_INLINED_SIZE {
167 self.views.push_n(BinaryView::new_inlined(bytes), n);
168 } else {
169 let buffer_index =
170 u32::try_from(self.buffers.len()).vortex_expect("buffer count exceeds u32::MAX");
171
172 let buf = self
173 .open_buffer
174 .get_or_insert_with(|| ByteBufferMut::with_capacity(BUFFER_CAPACITY));
175 let offset = u32::try_from(buf.len()).vortex_expect("buffer length exceeds u32::MAX");
176 buf.extend_from_slice(value.as_ref());
177
178 self.views
179 .push_n(BinaryView::make_view(bytes, buffer_index, offset), n);
180 }
181
182 self.validity.append_n(true, n);
183 }
184
185 pub fn append_owned_values(&mut self, value: T::Scalar, n: usize) {
189 let buffer: ByteBuffer = value.into();
190
191 if buffer.len() <= BinaryView::MAX_INLINED_SIZE {
192 self.views
193 .push_n(BinaryView::new_inlined(buffer.as_ref()), n);
194 } else {
195 self.flush_open_buffer();
196
197 let buffer_index = u32::try_from(self.buffers.len())
198 .vortex_expect("buffer count exceeds u32::MAX")
199 + 1;
200 self.views
201 .push_n(BinaryView::make_view(buffer.as_ref(), buffer_index, 0), n);
202 self.buffers.push(buffer);
203 }
204
205 self.validity.append_n(true, n);
206 }
207
208 fn flush_open_buffer(&mut self) {
209 if let Some(open) = self.open_buffer.take() {
210 self.buffers.push(open.freeze());
211 }
212 }
213}
214
215impl<T: BinaryViewType> VectorMutOps for BinaryViewVectorMut<T> {
216 type Immutable = BinaryViewVector<T>;
217
218 fn len(&self) -> usize {
219 self.views.len()
220 }
221
222 fn validity(&self) -> &MaskMut {
223 &self.validity
224 }
225
226 fn capacity(&self) -> usize {
227 self.views.capacity()
228 }
229
230 fn reserve(&mut self, additional: usize) {
231 self.views.reserve(additional);
232 self.validity.reserve(additional);
233 }
234
235 fn clear(&mut self) {
236 self.views.clear();
237 self.validity.clear();
238 self.buffers.clear();
239 self.open_buffer = None;
240 }
241
242 fn truncate(&mut self, len: usize) {
243 self.views.truncate(len);
244 self.validity.truncate(len);
245 }
246
247 fn extend_from_vector(&mut self, other: &BinaryViewVector<T>) {
248 self.flush_open_buffer();
250
251 let offset =
252 u32::try_from(self.buffers.len()).vortex_expect("buffer count exceeds u32::MAX");
253
254 self.buffers.extend(other.buffers().iter().cloned());
255
256 let new_views_iter = other.views().iter().copied().map(|mut v| {
257 if v.is_inlined() {
258 v
259 } else {
260 v.as_view_mut().buffer_index += offset;
261 v
262 }
263 });
264 self.views.extend(new_views_iter);
265
266 self.validity.append_mask(other.validity())
267 }
268
269 fn append_nulls(&mut self, n: usize) {
270 self.views.push_n(BinaryView::empty_view(), n);
271 self.validity.append_n(false, n);
272 }
273
274 fn append_zeros(&mut self, n: usize) {
275 self.views.push_n(BinaryView::empty_view(), n);
276 self.validity.append_n(true, n);
277 }
278
279 fn append_scalars(&mut self, scalar: &BinaryViewScalar<T>, n: usize) {
280 match scalar.value() {
281 None => self.append_nulls(n),
282 Some(v) => {
283 self.append_owned_values(v.clone(), n);
284 }
285 }
286 }
287
288 fn freeze(mut self) -> BinaryViewVector<T> {
289 self.flush_open_buffer();
291
292 unsafe {
293 BinaryViewVector::new_unchecked(
294 self.views.freeze(),
295 Arc::new(self.buffers.into_boxed_slice()),
296 self.validity.freeze(),
297 )
298 }
299 }
300
301 fn split_off(&mut self, _at: usize) -> Self {
302 todo!()
303 }
304
305 fn unsplit(&mut self, other: Self) {
306 if self.is_empty() {
307 *self = other;
308 return;
309 }
310
311 todo!()
312 }
313}
314
315#[cfg(test)]
316mod tests {
317 use std::ops::Deref;
318 use std::sync::Arc;
319
320 use vortex_buffer::ByteBuffer;
321 use vortex_buffer::buffer;
322 use vortex_buffer::buffer_mut;
323 use vortex_mask::Mask;
324 use vortex_mask::MaskMut;
325
326 use crate::VectorMutOps;
327 use crate::VectorOps;
328 use crate::binaryview::StringVector;
329 use crate::binaryview::StringVectorMut;
330 use crate::binaryview::view::BinaryView;
331
332 #[test]
333 fn test_basic() {
334 let strings_mut = StringVectorMut::new(
335 buffer_mut![
336 BinaryView::new_inlined(b"inlined1"),
337 BinaryView::make_view(b"long string 1", 0, 0),
338 BinaryView::new_inlined(b"inlined2"),
339 BinaryView::make_view(b"long string 2", 0, 13),
340 BinaryView::new_inlined(b"inlined3"),
341 BinaryView::make_view(b"long string 3", 0, 26),
342 ],
343 vec![ByteBuffer::copy_from(
344 "long string 1long string 2long string 3",
345 )],
346 MaskMut::new_true(6),
347 );
348
349 let strings = strings_mut.freeze();
350 assert_eq!(strings.get_ref(0), Some("inlined1"));
351 assert_eq!(strings.get_ref(1), Some("long string 1"));
352 assert_eq!(strings.get_ref(2), Some("inlined2"));
353 assert_eq!(strings.get_ref(3), Some("long string 2"));
354 assert_eq!(strings.get_ref(4), Some("inlined3"));
355 assert_eq!(strings.get_ref(5), Some("long string 3"));
356 }
357
358 #[test]
359 fn test_extend_self_reference() {
360 let buf0 = ByteBuffer::copy_from(
361 b"a really very quite long string 1a really very quite long string 2",
362 );
363 let buf1 = ByteBuffer::copy_from(
364 b"a really very quite long string 3a really very quite long string 4",
365 );
366
367 let mut strings_mut = StringVectorMut::new(
368 buffer_mut![
369 BinaryView::new_inlined(b"inlined0"),
370 BinaryView::new_inlined(b"inlined1"),
371 BinaryView::make_view(b"a really very quite long string 4", 1, 33),
372 BinaryView::make_view(b"a really very quite long string 3", 1, 0),
373 BinaryView::make_view(b"a really very quite long string 2", 0, 33),
374 BinaryView::make_view(b"a really very quite long string 1", 0, 0),
375 ],
376 vec![buf0.clone(), buf1.clone()],
377 MaskMut::new_true(6),
378 );
379
380 let strings = StringVector::new(
382 buffer![BinaryView::make_view(
383 b"a really very quite long string 2",
384 0,
385 33
386 )],
387 Arc::new(Box::new([buf1.clone()])),
388 Mask::new_true(1),
389 );
390
391 strings_mut.extend_from_vector(&strings);
392
393 let strings_finished = strings_mut.freeze();
394 assert!(strings_finished.validity().all_true());
395
396 assert_eq!(strings_finished.get_ref(0).unwrap(), "inlined0");
397 assert_eq!(strings_finished.get_ref(1).unwrap(), "inlined1");
398 assert_eq!(
399 strings_finished.get_ref(2).unwrap(),
400 "a really very quite long string 4"
401 );
402 assert_eq!(
403 strings_finished.get_ref(3).unwrap(),
404 "a really very quite long string 3"
405 );
406 assert_eq!(
407 strings_finished.get_ref(4).unwrap(),
408 "a really very quite long string 2",
409 );
410 assert_eq!(
411 strings_finished.get_ref(5).unwrap(),
412 "a really very quite long string 1"
413 );
414 assert_eq!(
415 strings_finished.get_ref(6).unwrap(),
416 "a really very quite long string 4"
417 );
418
419 assert_eq!(
420 strings_finished.buffers().deref().as_ref(),
421 &[buf0, buf1.clone(), buf1]
422 );
423 }
424
425 #[test]
426 fn test_extend_nulls() {
427 let mut mask1 = MaskMut::with_capacity(4);
429 mask1.append_n(false, 2);
430 mask1.append_n(true, 2);
431
432 let mut strings_mut = StringVectorMut::new(
433 buffer_mut![
434 BinaryView::empty_view(),
435 BinaryView::empty_view(),
436 BinaryView::new_inlined(b"nonnull1"),
437 BinaryView::new_inlined(b"nonnull2"),
438 ],
439 vec![ByteBuffer::empty()],
440 mask1,
441 );
442
443 let strings = StringVector::new(
444 buffer![
445 BinaryView::new_inlined(b"extend1"),
446 BinaryView::empty_view(),
447 BinaryView::new_inlined(b"extend2"),
448 ],
449 Arc::new(Box::new([ByteBuffer::empty()])),
450 Mask::from_iter([true, false, true]),
451 );
452
453 strings_mut.extend_from_vector(&strings);
454 let strings_finished = strings_mut.freeze();
455
456 assert_eq!(strings_finished.get_ref(0), None);
457 assert_eq!(strings_finished.get_ref(1), None);
458 assert_eq!(strings_finished.get_ref(2), Some("nonnull1"));
459 assert_eq!(strings_finished.get_ref(3), Some("nonnull2"));
460 assert_eq!(strings_finished.get_ref(4), Some("extend1"));
461 assert_eq!(strings_finished.get_ref(5), None);
462 assert_eq!(strings_finished.get_ref(6), Some("extend2"));
463 }
464}