vortex_vector/vector_ops.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`VectorOps`] and [`VectorMutOps`] for [`Vector`] and
5//! [`VectorMut`], respectively.
6
7use std::fmt::Debug;
8use std::ops::RangeBounds;
9
10use vortex_mask::{Mask, MaskMut};
11
12use crate::{Scalar, Vector, VectorMut, private};
13
14/// Common operations for immutable vectors (all the variants of [`Vector`]).
15pub trait VectorOps: private::Sealed + Into<Vector> + Sized {
16 /// The mutable equivalent of this immutable vector.
17 type Mutable: VectorMutOps<Immutable = Self>;
18
19 /// Returns the number of elements in the vector, also referred to as its "length".
20 fn len(&self) -> usize;
21
22 /// Returns `true` if the vector contains no elements.
23 fn is_empty(&self) -> bool {
24 self.len() == 0
25 }
26
27 /// Returns the validity mask of the vector, where `true` represents a _valid_ element and
28 /// `false` represents a `null` element.
29 ///
30 /// Note that vectors are **always** considered nullable. "Non-nullable" data will simply have a
31 /// [`Mask`] of [`AllTrue(len)`](Mask::AllTrue). It is on the caller to ensure that they do not
32 /// add nullable data to a vector they want to keep as non-nullable.
33 fn validity(&self) -> &Mask;
34
35 /// Returns the null count of the vector.
36 fn null_count(&self) -> usize {
37 self.validity().false_count()
38 }
39
40 /// Return the scalar at the given index.
41 ///
42 /// # Panics
43 ///
44 /// Panics if the index is out of bounds.
45 fn scalar_at(&self, index: usize) -> Scalar;
46
47 /// Slice the vector from `start` to `end` (exclusive).
48 fn slice(&self, range: impl RangeBounds<usize> + Clone + Debug) -> Self;
49
50 /// Tries to convert `self` into a mutable vector (implementing [`VectorMutOps`]).
51 ///
52 /// This method will only succeed if `self` is the only unique strong reference (it effectively
53 /// "owns" the buffer). If this is true, this method will return a mutable vector with the
54 /// contents of `self` **without** any copying of data.
55 ///
56 /// # Errors
57 ///
58 /// If `self` is not unique, this will fail and return `self` back to the caller.
59 fn try_into_mut(self) -> Result<Self::Mutable, Self>;
60
61 /// Converts `self` into a mutable vector (implementing [`VectorMutOps`]).
62 ///
63 /// This method uses "clone-on-write" semantics, meaning it will clone any underlying data that
64 /// has multiple references (preventing mutable access). `into_mut` can be more efficient than
65 /// [`try_into_mut()`] when mutations are infrequent.
66 ///
67 /// The semantics of `into_mut` are somewhat similar to that of [`Arc::make_mut()`], but instead
68 /// of working with references, this works with owned immutable / mutable types.
69 ///
70 /// [`try_into_mut()`]: Self::try_into_mut
71 /// [`Arc::make_mut()`]: std::sync::Arc::make_mut
72 fn into_mut(self) -> Self::Mutable;
73}
74
75/// Common operations for mutable vectors (all the variants of [`VectorMut`]).
76pub trait VectorMutOps: private::Sealed + Into<VectorMut> + Sized {
77 /// The immutable equivalent of this mutable vector.
78 type Immutable: VectorOps<Mutable = Self>;
79
80 /// Returns the number of elements in the vector, also referred to as its "length".
81 fn len(&self) -> usize;
82
83 /// Returns `true` if the vector contains no elements.
84 fn is_empty(&self) -> bool {
85 self.len() == 0
86 }
87
88 /// Returns the validity mask of the vector, where `true` represents a _valid_ element and
89 /// `false` represents a `null` element.
90 ///
91 /// Note that while this returns a [`MaskMut`] (which is typically an owned type), the caller is
92 /// only allowed to inspect it via the shared reference.
93 fn validity(&self) -> &MaskMut;
94
95 /// Returns the total number of elements the vector can hold without reallocating.
96 fn capacity(&self) -> usize;
97
98 /// Reserves capacity for at least `additional` more elements to be inserted in the given
99 /// vector.
100 ///
101 /// The collection may reserve more space to speculatively avoid frequent reallocations. After
102 /// calling `reserve`, the capacity will be greater than or equal to `self.len() + additional`.
103 /// Does nothing if capacity is already sufficient.
104 ///
105 /// Please let us know if you need `reserve_exact` functionality!
106 fn reserve(&mut self, additional: usize);
107
108 /// Clears the buffer, removing all data. Existing capacity is preserved.
109 fn clear(&mut self);
110
111 /// Shortens the buffer, keeping the first len bytes and dropping the rest.
112 ///
113 /// If len is greater than the buffer’s current length, this has no effect.
114 ///
115 /// Existing underlying capacity is preserved.
116 fn truncate(&mut self, len: usize);
117
118 /// Extends the vector by appending elements from another vector.
119 ///
120 /// # Panics
121 ///
122 /// Panics if the `other` vector has the wrong type (for example, a
123 /// [`StructVector`](crate::struct_::StructVector) might have incorrect fields).
124 fn extend_from_vector(&mut self, other: &Self::Immutable);
125
126 /// Appends `n` null elements to the vector.
127 ///
128 /// Implementors should ensure that they correctly append "null" or garbage values to their
129 /// elements in addition to adding nulls to their validity mask.
130 fn append_nulls(&mut self, n: usize);
131
132 /// Converts `self` into an immutable vector.
133 fn freeze(self) -> Self::Immutable;
134
135 /// Splits the vector into two at the given index.
136 ///
137 /// Afterward, `self` contains elements `[0, at)`, and the returned vector contains elements
138 /// `[at, capacity)`. It's guaranteed that the memory does not move, that is, the address of
139 /// `self` does not change, and the address of the returned slice is at bytes after that.
140 ///
141 /// This is an `O(1)` operation that just increases the reference count and sets a few indices.
142 ///
143 /// # Panics
144 ///
145 /// Panics if we try to split off more than the current capacity of the vector (if
146 /// `at > capacity`).
147 fn split_off(&mut self, at: usize) -> Self;
148
149 /// Absorbs a mutable vector that was previously split off.
150 ///
151 /// If the two vectors were previously contiguous and not mutated in a way that causes
152 /// re-allocation i.e., if other was created by calling [`split_off()`] on this vector, then
153 /// this is an `O(1)` operation (simply decreases a reference count and sets a few indices).
154 ///
155 /// Otherwise, this method falls back to `self.extend_from_vector(other)`.
156 ///
157 /// [`split_off()`]: Self::split_off
158 fn unsplit(&mut self, other: Self);
159}
160
161/// Converts a range bounds into a length, given the total length of the vector.
162pub(crate) fn range_bounds_to_len(bounds: impl RangeBounds<usize> + Debug, len: usize) -> usize {
163 use std::ops::Bound;
164
165 let start = match bounds.start_bound() {
166 Bound::Included(&s) => s,
167 Bound::Excluded(&s) => s + 1,
168 Bound::Unbounded => 0,
169 };
170
171 let end = match bounds.end_bound() {
172 Bound::Included(&e) => e + 1,
173 Bound::Excluded(&e) => e,
174 Bound::Unbounded => len,
175 };
176
177 assert!(
178 start <= end && end <= len,
179 "Range {:?} out of bounds for length {}",
180 bounds,
181 len
182 );
183
184 end - start
185}