vortex_vector/binaryview/
vector_mut.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Mutable variable-length binary vector.
5
6use std::sync::Arc;
7
8use vortex_buffer::BufferMut;
9use vortex_buffer::ByteBuffer;
10use vortex_buffer::ByteBufferMut;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_ensure;
14use vortex_mask::MaskMut;
15
16use crate::VectorMutOps;
17use crate::VectorOps;
18use crate::binaryview::BinaryViewScalar;
19use crate::binaryview::BinaryViewType;
20use crate::binaryview::vector::BinaryViewVector;
21use crate::binaryview::view::BinaryView;
22use crate::binaryview::view::validate_views;
23
24// Default capacity for new string data buffers of 2MiB.
25const BUFFER_CAPACITY: usize = 2 * 1024 * 1024;
26
27/// A mutable vector of binary view data.
28///
29/// The immutable equivalent of this type is [`BinaryViewVector`].
30#[derive(Clone, Debug)]
31pub struct BinaryViewVectorMut<T: BinaryViewType> {
32    /// Views into the binary data.
33    views: BufferMut<BinaryView>,
34    /// Validity mask for the vector.
35    validity: MaskMut,
36
37    /// The completed buffers holding referenced binary data.
38    buffers: Vec<ByteBuffer>,
39    /// The current buffer being appended to, if any.
40    open_buffer: Option<ByteBufferMut>,
41
42    /// Marker trait for the [`BinaryViewType`].
43    _marker: std::marker::PhantomData<T>,
44}
45
46impl<T: BinaryViewType> BinaryViewVectorMut<T> {
47    /// Create a new [`BinaryViewVectorMut`] from its components, panicking if validation fails.
48    ///
49    /// # Errors
50    ///
51    /// This function will panic if any of the validation checks performed by [`try_new`][Self::try_new]
52    /// fails.
53    pub fn new(views: BufferMut<BinaryView>, buffers: Vec<ByteBuffer>, validity: MaskMut) -> Self {
54        Self::try_new(views, buffers, validity)
55            .vortex_expect("Failed to create `BinaryViewVectorMut`")
56    }
57
58    /// Create a new empty [`BinaryViewVectorMut`], pre-allocated to hold the specified number
59    /// of items. This does not reserve any memory for string data itself, only for the binary views
60    /// and the validity bits.
61    pub fn with_capacity(capacity: usize) -> Self {
62        Self::new(
63            BufferMut::with_capacity(capacity),
64            Vec::new(),
65            MaskMut::with_capacity(capacity),
66        )
67    }
68
69    /// Tries to create a new [`BinaryViewVectorMut`] from its components.
70    ///
71    /// # Errors
72    ///
73    /// Returns an error if the length of the validity mask does not match the length of the views.
74    ///
75    /// Returns an error if the views reference any data that is not a valid buffer
76    pub fn try_new(
77        views: BufferMut<BinaryView>,
78        buffers: Vec<ByteBuffer>,
79        validity: MaskMut,
80    ) -> VortexResult<Self> {
81        vortex_ensure!(
82            views.len() == validity.len(),
83            "views buffer length {} != validity length {}",
84            views.len(),
85            validity.len()
86        );
87
88        validate_views(&views, &buffers, |index| validity.value(index), T::validate)?;
89
90        Ok(Self {
91            views,
92            buffers,
93            validity,
94            open_buffer: None,
95            _marker: std::marker::PhantomData,
96        })
97    }
98
99    /// Creates a new [`BinaryViewVectorMut`] from the given bits and validity mask without validation.
100    ///
101    /// # Safety
102    ///
103    /// The caller must ensure that the validity mask has the same length as the views.
104    pub unsafe fn new_unchecked(
105        views: BufferMut<BinaryView>,
106        validity: MaskMut,
107        buffers: Vec<ByteBuffer>,
108    ) -> Self {
109        if cfg!(debug_assertions) {
110            Self::new(views, buffers, validity)
111        } else {
112            Self {
113                views,
114                buffers,
115                validity,
116                open_buffer: None,
117                _marker: std::marker::PhantomData,
118            }
119        }
120    }
121
122    /// Get a mutable handle to the buffer holding the [views][BinaryView] of the vector.
123    ///
124    /// # Safety
125    ///
126    /// Caller must make sure that length of the views always matches
127    /// length of the validity mask.
128    pub unsafe fn views_mut(&mut self) -> &mut BufferMut<BinaryView> {
129        &mut self.views
130    }
131
132    /// Get a mutable handle to the validity mask of the vector.
133    ///
134    /// # Safety
135    ///
136    /// Caller must make sure that the length of the validity mask
137    /// always matches the length of the views
138    pub unsafe fn validity_mut(&mut self) -> &mut MaskMut {
139        &mut self.validity
140    }
141
142    /// Get a mutable handle to the vector of buffers backing the string data of the vector.
143    pub fn buffers(&mut self) -> &mut Vec<ByteBuffer> {
144        &mut self.buffers
145    }
146
147    /// Append a repeated sequence of binary data to a vector.
148    ///
149    /// ```
150    /// # use vortex_vector::binaryview::StringVectorMut;
151    /// # use vortex_vector::VectorMutOps;
152    /// let mut strings = StringVectorMut::with_capacity(4);
153    /// strings.append_values("inlined", 2);
154    /// strings.append_nulls(1);
155    /// strings.append_values("large not inlined", 1);
156    ///
157    /// let strings = strings.freeze();
158    ///
159    /// assert_eq!(
160    ///     [strings.get_ref(0), strings.get_ref(1), strings.get_ref(2), strings.get_ref(3)],
161    ///     [Some("inlined"), Some("inlined"), None, Some("large not inlined")],
162    /// );
163    /// ```
164    pub fn append_values(&mut self, value: &T::Slice, n: usize) {
165        let bytes = value.as_ref();
166        if bytes.len() <= BinaryView::MAX_INLINED_SIZE {
167            self.views.push_n(BinaryView::new_inlined(bytes), n);
168        } else {
169            let buffer_index =
170                u32::try_from(self.buffers.len()).vortex_expect("buffer count exceeds u32::MAX");
171
172            let buf = self
173                .open_buffer
174                .get_or_insert_with(|| ByteBufferMut::with_capacity(BUFFER_CAPACITY));
175            let offset = u32::try_from(buf.len()).vortex_expect("buffer length exceeds u32::MAX");
176            buf.extend_from_slice(value.as_ref());
177
178            self.views
179                .push_n(BinaryView::make_view(bytes, buffer_index, offset), n);
180        }
181
182        self.validity.append_n(true, n);
183    }
184
185    /// Append a repeated sequence of binary data to a vector, from an owned buffer.
186    ///
187    /// The buffer will be used directly if possible, avoiding a copy.
188    pub fn append_owned_values(&mut self, value: T::Scalar, n: usize) {
189        let buffer: ByteBuffer = value.into();
190
191        if buffer.len() <= BinaryView::MAX_INLINED_SIZE {
192            self.views
193                .push_n(BinaryView::new_inlined(buffer.as_ref()), n);
194        } else {
195            self.flush_open_buffer();
196
197            let buffer_index = u32::try_from(self.buffers.len())
198                .vortex_expect("buffer count exceeds u32::MAX")
199                + 1;
200            self.views
201                .push_n(BinaryView::make_view(buffer.as_ref(), buffer_index, 0), n);
202            self.buffers.push(buffer);
203        }
204
205        self.validity.append_n(true, n);
206    }
207
208    fn flush_open_buffer(&mut self) {
209        if let Some(open) = self.open_buffer.take() {
210            self.buffers.push(open.freeze());
211        }
212    }
213}
214
215impl<T: BinaryViewType> VectorMutOps for BinaryViewVectorMut<T> {
216    type Immutable = BinaryViewVector<T>;
217
218    fn len(&self) -> usize {
219        self.views.len()
220    }
221
222    fn validity(&self) -> &MaskMut {
223        &self.validity
224    }
225
226    fn capacity(&self) -> usize {
227        self.views.capacity()
228    }
229
230    fn reserve(&mut self, additional: usize) {
231        self.views.reserve(additional);
232        self.validity.reserve(additional);
233    }
234
235    fn clear(&mut self) {
236        self.views.clear();
237        self.validity.clear();
238        self.buffers.clear();
239        self.open_buffer = None;
240    }
241
242    fn truncate(&mut self, len: usize) {
243        self.views.truncate(len);
244        self.validity.truncate(len);
245    }
246
247    fn extend_from_vector(&mut self, other: &BinaryViewVector<T>) {
248        // Close any existing views into a new buffer
249        self.flush_open_buffer();
250
251        let offset =
252            u32::try_from(self.buffers.len()).vortex_expect("buffer count exceeds u32::MAX");
253
254        self.buffers.extend(other.buffers().iter().cloned());
255
256        let new_views_iter = other.views().iter().copied().map(|mut v| {
257            if v.is_inlined() {
258                v
259            } else {
260                v.as_view_mut().buffer_index += offset;
261                v
262            }
263        });
264        self.views.extend(new_views_iter);
265
266        self.validity.append_mask(other.validity())
267    }
268
269    fn append_nulls(&mut self, n: usize) {
270        self.views.push_n(BinaryView::empty_view(), n);
271        self.validity.append_n(false, n);
272    }
273
274    fn append_zeros(&mut self, n: usize) {
275        self.views.push_n(BinaryView::empty_view(), n);
276        self.validity.append_n(true, n);
277    }
278
279    fn append_scalars(&mut self, scalar: &BinaryViewScalar<T>, n: usize) {
280        match scalar.value() {
281            None => self.append_nulls(n),
282            Some(v) => {
283                self.append_owned_values(v.clone(), n);
284            }
285        }
286    }
287
288    fn freeze(mut self) -> BinaryViewVector<T> {
289        // Freeze all components, close any in-progress views
290        self.flush_open_buffer();
291
292        unsafe {
293            BinaryViewVector::new_unchecked(
294                self.views.freeze(),
295                Arc::new(self.buffers.into_boxed_slice()),
296                self.validity.freeze(),
297            )
298        }
299    }
300
301    fn split_off(&mut self, _at: usize) -> Self {
302        todo!()
303    }
304
305    fn unsplit(&mut self, other: Self) {
306        if self.is_empty() {
307            *self = other;
308            return;
309        }
310
311        todo!()
312    }
313}
314
315#[cfg(test)]
316mod tests {
317    use std::ops::Deref;
318    use std::sync::Arc;
319
320    use vortex_buffer::ByteBuffer;
321    use vortex_buffer::buffer;
322    use vortex_buffer::buffer_mut;
323    use vortex_mask::Mask;
324    use vortex_mask::MaskMut;
325
326    use crate::VectorMutOps;
327    use crate::VectorOps;
328    use crate::binaryview::StringVector;
329    use crate::binaryview::StringVectorMut;
330    use crate::binaryview::view::BinaryView;
331
332    #[test]
333    fn test_basic() {
334        let strings_mut = StringVectorMut::new(
335            buffer_mut![
336                BinaryView::new_inlined(b"inlined1"),
337                BinaryView::make_view(b"long string 1", 0, 0),
338                BinaryView::new_inlined(b"inlined2"),
339                BinaryView::make_view(b"long string 2", 0, 13),
340                BinaryView::new_inlined(b"inlined3"),
341                BinaryView::make_view(b"long string 3", 0, 26),
342            ],
343            vec![ByteBuffer::copy_from(
344                "long string 1long string 2long string 3",
345            )],
346            MaskMut::new_true(6),
347        );
348
349        let strings = strings_mut.freeze();
350        assert_eq!(strings.get_ref(0), Some("inlined1"));
351        assert_eq!(strings.get_ref(1), Some("long string 1"));
352        assert_eq!(strings.get_ref(2), Some("inlined2"));
353        assert_eq!(strings.get_ref(3), Some("long string 2"));
354        assert_eq!(strings.get_ref(4), Some("inlined3"));
355        assert_eq!(strings.get_ref(5), Some("long string 3"));
356    }
357
358    #[test]
359    fn test_extend_self_reference() {
360        let buf0 = ByteBuffer::copy_from(
361            b"a really very quite long string 1a really very quite long string 2",
362        );
363        let buf1 = ByteBuffer::copy_from(
364            b"a really very quite long string 3a really very quite long string 4",
365        );
366
367        let mut strings_mut = StringVectorMut::new(
368            buffer_mut![
369                BinaryView::new_inlined(b"inlined0"),
370                BinaryView::new_inlined(b"inlined1"),
371                BinaryView::make_view(b"a really very quite long string 4", 1, 33),
372                BinaryView::make_view(b"a really very quite long string 3", 1, 0),
373                BinaryView::make_view(b"a really very quite long string 2", 0, 33),
374                BinaryView::make_view(b"a really very quite long string 1", 0, 0),
375            ],
376            vec![buf0.clone(), buf1.clone()],
377            MaskMut::new_true(6),
378        );
379
380        // The `StringVector` we extend from
381        let strings = StringVector::new(
382            buffer![BinaryView::make_view(
383                b"a really very quite long string 2",
384                0,
385                33
386            )],
387            Arc::new(Box::new([buf1.clone()])),
388            Mask::new_true(1),
389        );
390
391        strings_mut.extend_from_vector(&strings);
392
393        let strings_finished = strings_mut.freeze();
394        assert!(strings_finished.validity().all_true());
395
396        assert_eq!(strings_finished.get_ref(0).unwrap(), "inlined0");
397        assert_eq!(strings_finished.get_ref(1).unwrap(), "inlined1");
398        assert_eq!(
399            strings_finished.get_ref(2).unwrap(),
400            "a really very quite long string 4"
401        );
402        assert_eq!(
403            strings_finished.get_ref(3).unwrap(),
404            "a really very quite long string 3"
405        );
406        assert_eq!(
407            strings_finished.get_ref(4).unwrap(),
408            "a really very quite long string 2",
409        );
410        assert_eq!(
411            strings_finished.get_ref(5).unwrap(),
412            "a really very quite long string 1"
413        );
414        assert_eq!(
415            strings_finished.get_ref(6).unwrap(),
416            "a really very quite long string 4"
417        );
418
419        assert_eq!(
420            strings_finished.buffers().deref().as_ref(),
421            &[buf0, buf1.clone(), buf1]
422        );
423    }
424
425    #[test]
426    fn test_extend_nulls() {
427        // Extend multiple times, with nulls.
428        let mut mask1 = MaskMut::with_capacity(4);
429        mask1.append_n(false, 2);
430        mask1.append_n(true, 2);
431
432        let mut strings_mut = StringVectorMut::new(
433            buffer_mut![
434                BinaryView::empty_view(),
435                BinaryView::empty_view(),
436                BinaryView::new_inlined(b"nonnull1"),
437                BinaryView::new_inlined(b"nonnull2"),
438            ],
439            vec![ByteBuffer::empty()],
440            mask1,
441        );
442
443        let strings = StringVector::new(
444            buffer![
445                BinaryView::new_inlined(b"extend1"),
446                BinaryView::empty_view(),
447                BinaryView::new_inlined(b"extend2"),
448            ],
449            Arc::new(Box::new([ByteBuffer::empty()])),
450            Mask::from_iter([true, false, true]),
451        );
452
453        strings_mut.extend_from_vector(&strings);
454        let strings_finished = strings_mut.freeze();
455
456        assert_eq!(strings_finished.get_ref(0), None);
457        assert_eq!(strings_finished.get_ref(1), None);
458        assert_eq!(strings_finished.get_ref(2), Some("nonnull1"));
459        assert_eq!(strings_finished.get_ref(3), Some("nonnull2"));
460        assert_eq!(strings_finished.get_ref(4), Some("extend1"));
461        assert_eq!(strings_finished.get_ref(5), None);
462        assert_eq!(strings_finished.get_ref(6), Some("extend2"));
463    }
464}