vortex_vector/
vector_ops.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`VectorOps`] and [`VectorMutOps`] for [`Vector`] and
5//! [`VectorMut`], respectively.
6
7use std::fmt::Debug;
8use std::ops::RangeBounds;
9
10use vortex_mask::{Mask, MaskMut};
11
12use crate::{Scalar, Vector, VectorMut, private};
13
14/// Common operations for immutable vectors (all the variants of [`Vector`]).
15pub trait VectorOps: private::Sealed + Into<Vector> + Sized {
16    /// The mutable equivalent of this immutable vector.
17    type Mutable: VectorMutOps<Immutable = Self>;
18
19    /// Returns the number of elements in the vector, also referred to as its "length".
20    fn len(&self) -> usize;
21
22    /// Returns `true` if the vector contains no elements.
23    fn is_empty(&self) -> bool {
24        self.len() == 0
25    }
26
27    /// Returns the validity mask of the vector, where `true` represents a _valid_ element and
28    /// `false` represents a `null` element.
29    ///
30    /// Note that vectors are **always** considered nullable. "Non-nullable" data will simply have a
31    /// [`Mask`] of [`AllTrue(len)`](Mask::AllTrue). It is on the caller to ensure that they do not
32    /// add nullable data to a vector they want to keep as non-nullable.
33    fn validity(&self) -> &Mask;
34
35    /// Returns the null count of the vector.
36    fn null_count(&self) -> usize {
37        self.validity().false_count()
38    }
39
40    /// Return the scalar at the given index.
41    ///
42    /// # Panics
43    ///
44    /// Panics if the index is out of bounds.
45    fn scalar_at(&self, index: usize) -> Scalar;
46
47    /// Slice the vector from `start` to `end` (exclusive).
48    fn slice(&self, range: impl RangeBounds<usize> + Clone + Debug) -> Self;
49
50    /// Tries to convert `self` into a mutable vector (implementing [`VectorMutOps`]).
51    ///
52    /// This method will only succeed if `self` is the only unique strong reference (it effectively
53    /// "owns" the buffer). If this is true, this method will return a mutable vector with the
54    /// contents of `self` **without** any copying of data.
55    ///
56    /// # Errors
57    ///
58    /// If `self` is not unique, this will fail and return `self` back to the caller.
59    fn try_into_mut(self) -> Result<Self::Mutable, Self>;
60
61    /// Converts `self` into a mutable vector (implementing [`VectorMutOps`]).
62    ///
63    /// This method uses "clone-on-write" semantics, meaning it will clone any underlying data that
64    /// has multiple references (preventing mutable access). `into_mut` can be more efficient than
65    /// [`try_into_mut()`] when mutations are infrequent.
66    ///
67    /// The semantics of `into_mut` are somewhat similar to that of [`Arc::make_mut()`], but instead
68    /// of working with references, this works with owned immutable / mutable types.
69    ///
70    /// [`try_into_mut()`]: Self::try_into_mut
71    /// [`Arc::make_mut()`]: std::sync::Arc::make_mut
72    fn into_mut(self) -> Self::Mutable;
73}
74
75/// Common operations for mutable vectors (all the variants of [`VectorMut`]).
76pub trait VectorMutOps: private::Sealed + Into<VectorMut> + Sized {
77    /// The immutable equivalent of this mutable vector.
78    type Immutable: VectorOps<Mutable = Self>;
79
80    /// Returns the number of elements in the vector, also referred to as its "length".
81    fn len(&self) -> usize;
82
83    /// Returns `true` if the vector contains no elements.
84    fn is_empty(&self) -> bool {
85        self.len() == 0
86    }
87
88    /// Returns the validity mask of the vector, where `true` represents a _valid_ element and
89    /// `false` represents a `null` element.
90    ///
91    /// Note that while this returns a [`MaskMut`] (which is typically an owned type), the caller is
92    /// only allowed to inspect it via the shared reference.
93    fn validity(&self) -> &MaskMut;
94
95    /// Returns the total number of elements the vector can hold without reallocating.
96    fn capacity(&self) -> usize;
97
98    /// Reserves capacity for at least `additional` more elements to be inserted in the given
99    /// vector.
100    ///
101    /// The collection may reserve more space to speculatively avoid frequent reallocations. After
102    /// calling `reserve`, the capacity will be greater than or equal to `self.len() + additional`.
103    /// Does nothing if capacity is already sufficient.
104    ///
105    /// Please let us know if you need `reserve_exact` functionality!
106    fn reserve(&mut self, additional: usize);
107
108    /// Clears the buffer, removing all data. Existing capacity is preserved.
109    fn clear(&mut self);
110
111    /// Shortens the buffer, keeping the first len bytes and dropping the rest.
112    ///
113    /// If len is greater than the buffer’s current length, this has no effect.
114    ///
115    /// Existing underlying capacity is preserved.
116    fn truncate(&mut self, len: usize);
117
118    /// Extends the vector by appending elements from another vector.
119    ///
120    /// # Panics
121    ///
122    /// Panics if the `other` vector has the wrong type (for example, a
123    /// [`StructVector`](crate::struct_::StructVector) might have incorrect fields).
124    fn extend_from_vector(&mut self, other: &Self::Immutable);
125
126    /// Appends `n` null elements to the vector.
127    ///
128    /// Implementors should ensure that they correctly append "null" or garbage values to their
129    /// elements in addition to adding nulls to their validity mask.
130    fn append_nulls(&mut self, n: usize);
131
132    /// Converts `self` into an immutable vector.
133    fn freeze(self) -> Self::Immutable;
134
135    /// Splits the vector into two at the given index.
136    ///
137    /// Afterward, `self` contains elements `[0, at)`, and the returned vector contains elements
138    /// `[at, capacity)`. It's guaranteed that the memory does not move, that is, the address of
139    /// `self` does not change, and the address of the returned slice is at bytes after that.
140    ///
141    /// This is an `O(1)` operation that just increases the reference count and sets a few indices.
142    ///
143    /// # Panics
144    ///
145    /// Panics if we try to split off more than the current capacity of the vector (if
146    /// `at > capacity`).
147    fn split_off(&mut self, at: usize) -> Self;
148
149    /// Absorbs a mutable vector that was previously split off.
150    ///
151    /// If the two vectors were previously contiguous and not mutated in a way that causes
152    /// re-allocation i.e., if other was created by calling [`split_off()`] on this vector, then
153    /// this is an `O(1)` operation (simply decreases a reference count and sets a few indices).
154    ///
155    /// Otherwise, this method falls back to `self.extend_from_vector(other)`.
156    ///
157    /// [`split_off()`]: Self::split_off
158    fn unsplit(&mut self, other: Self);
159}
160
161/// Converts a range bounds into a length, given the total length of the vector.
162pub(crate) fn range_bounds_to_len(bounds: impl RangeBounds<usize> + Debug, len: usize) -> usize {
163    use std::ops::Bound;
164
165    let start = match bounds.start_bound() {
166        Bound::Included(&s) => s,
167        Bound::Excluded(&s) => s + 1,
168        Bound::Unbounded => 0,
169    };
170
171    let end = match bounds.end_bound() {
172        Bound::Included(&e) => e + 1,
173        Bound::Excluded(&e) => e,
174        Bound::Unbounded => len,
175    };
176
177    assert!(
178        start <= end && end <= len,
179        "Range {:?} out of bounds for length {}",
180        bounds,
181        len
182    );
183
184    end - start
185}