vortex_vector/primitive/generic.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`PVector<T>`].
5
6use std::fmt::Debug;
7use std::ops::BitAnd;
8use std::ops::RangeBounds;
9
10use vortex_buffer::Buffer;
11use vortex_dtype::NativePType;
12use vortex_error::VortexExpect;
13use vortex_error::VortexResult;
14use vortex_error::vortex_ensure;
15use vortex_mask::Mask;
16
17use crate::VectorOps;
18use crate::primitive::PScalar;
19use crate::primitive::PVectorMut;
20
21/// An immutable vector of generic primitive values.
22///
23/// `T` is expected to be bound by [`NativePType`], which templates an internal [`Buffer<T>`] that
24/// stores the elements of the vector.
25#[derive(Default, Debug, Clone)]
26pub struct PVector<T> {
27 /// The buffer representing the vector elements.
28 pub(super) elements: Buffer<T>,
29 /// The validity mask (where `true` represents an element is **not** null).
30 pub(super) validity: Mask,
31}
32
33impl<T: NativePType + PartialEq> PartialEq for PVector<T> {
34 fn eq(&self, other: &Self) -> bool {
35 if self.len() != other.len() {
36 return false;
37 }
38 // Validity patterns must match
39 if self.validity != other.validity {
40 return false;
41 }
42 // Compare all elements, OR with !validity to ignore invalid positions
43 self.elements
44 .iter()
45 .zip(other.elements.iter())
46 .enumerate()
47 .all(|(i, (a, b))| !self.validity.value(i) | (a == b))
48 }
49}
50
51impl<T: NativePType + Eq> Eq for PVector<T> {}
52
53impl<T> PVector<T> {
54 /// Creates a new [`PVector<T>`] from the given elements buffer and validity mask.
55 ///
56 /// # Panics
57 ///
58 /// Panics if the length of the validity mask does not match the length of the elements buffer.
59 pub fn new(elements: Buffer<T>, validity: Mask) -> Self {
60 Self::try_new(elements, validity).vortex_expect("Failed to create `PVector`")
61 }
62
63 /// Tries to create a new [`PVector<T>`] from the given elements buffer and validity mask.
64 ///
65 /// # Errors
66 ///
67 /// Returns an error if the length of the validity mask does not match the length of the
68 /// elements buffer.
69 pub fn try_new(elements: Buffer<T>, validity: Mask) -> VortexResult<Self> {
70 vortex_ensure!(
71 validity.len() == elements.len(),
72 "`PVector` validity mask must have the same length as elements"
73 );
74 Ok(Self { elements, validity })
75 }
76
77 /// Creates a new [`PVector<T>`] from the given elements buffer and validity mask without
78 /// validation.
79 ///
80 /// # Safety
81 ///
82 /// The caller must ensure that the validity mask has the same length as the elements buffer.
83 pub unsafe fn new_unchecked(elements: Buffer<T>, validity: Mask) -> Self {
84 if cfg!(debug_assertions) {
85 Self::new(elements, validity)
86 } else {
87 Self { elements, validity }
88 }
89 }
90
91 /// Decomposes the primitive vector into its constituent parts (buffer and validity).
92 pub fn into_parts(self) -> (Buffer<T>, Mask) {
93 (self.elements, self.validity)
94 }
95
96 /// Decomposes the primitive vector into its internal buffer, consuming the vector.
97 ///
98 /// # Panics
99 ///
100 /// Panics if there are any null values in the vector.
101 pub fn into_nonnull_buffer(self) -> Buffer<T> {
102 assert!(
103 self.validity.all_true(),
104 "Cannot convert to buffer: vector contains null values"
105 );
106 self.elements
107 }
108
109 /// Decomposes the primitive vector into its constituent parts by mutable reference.
110 ///
111 /// # Safety
112 ///
113 /// The caller must ensure that no other references to the internal parts exist while mutable
114 pub unsafe fn as_parts_mut(&mut self) -> (&mut Buffer<T>, &mut Mask) {
115 (&mut self.elements, &mut self.validity)
116 }
117
118 /// Gets a nullable element at the given index, panicking on out-of-bounds.
119 ///
120 /// If the element at the given index is null, returns `None`. Otherwise, returns `Some(x)`,
121 /// where `x: T`.
122 ///
123 /// Note that this `get` method is different from the standard library [`slice::get`], which
124 /// returns `None` if the index is out of bounds. This method will panic if the index is out of
125 /// bounds, and return `None` if the elements is null.
126 ///
127 /// # Panics
128 ///
129 /// Panics if the index is out of bounds.
130 pub fn get(&self, index: usize) -> Option<&T> {
131 self.validity.value(index).then(|| &self.elements[index])
132 }
133
134 /// Gets an element at the given index and converts it to type `U`.
135 ///
136 /// Returns `None` if:
137 /// - The element at the given index is null
138 /// - The conversion from `T` to `U` fails
139 ///
140 /// # Panics
141 ///
142 /// Panics if the index is out of bounds.
143 pub fn get_as<U>(&self, index: usize) -> Option<U>
144 where
145 U: TryFrom<T>,
146 T: Copy,
147 {
148 self.get(index).and_then(|&v| U::try_from(v).ok())
149 }
150
151 /// Returns the internal [`Buffer`] of the [`PVector`].
152 ///
153 /// Note that the internal buffer may hold garbage data in place of nulls. That information is
154 /// tracked by the [`validity()`](Self::validity).
155 #[inline]
156 pub fn elements(&self) -> &Buffer<T> {
157 &self.elements
158 }
159
160 /// Transmute a `PVector<T>` into a `PVector<U>`.
161 ///
162 /// # Safety
163 ///
164 /// The caller must ensure that all values of type `T` in this vector are valid as type `U`.
165 /// See [`std::mem::transmute`] for more details.
166 ///
167 /// # Panics
168 ///
169 /// Panics if the type `U` does not have the same size and alignment as `T`.
170 pub unsafe fn transmute<U: NativePType>(self) -> PVector<U> {
171 let (buffer, mask) = self.into_parts();
172
173 // SAFETY: same guarantees as this function.
174 let buffer = unsafe { buffer.transmute::<U>() };
175
176 PVector::new(buffer, mask)
177 }
178}
179
180impl<T: NativePType> AsRef<[T]> for PVector<T> {
181 /// Returns an immutable slice over the internal buffer with elements of type `T`.
182 ///
183 /// Note that this slice may contain garbage data where the [`validity()`] mask states that an
184 /// element is invalid.
185 ///
186 /// The caller should check the [`validity()`] before performing any operations.
187 ///
188 /// [`validity()`]: crate::VectorOps::validity
189 #[inline]
190 fn as_ref(&self) -> &[T] {
191 self.elements.as_slice()
192 }
193}
194
195impl<T: NativePType> VectorOps for PVector<T> {
196 type Mutable = PVectorMut<T>;
197 type Scalar = PScalar<T>;
198
199 fn len(&self) -> usize {
200 self.elements.len()
201 }
202
203 fn validity(&self) -> &Mask {
204 &self.validity
205 }
206
207 fn mask_validity(&mut self, mask: &Mask) {
208 self.validity = self.validity.bitand(mask);
209 }
210
211 fn scalar_at(&self, index: usize) -> PScalar<T> {
212 assert!(index < self.len(), "Index out of bounds in `PVector`");
213 PScalar::<T>::new(self.validity.value(index).then(|| self.elements[index]))
214 }
215
216 fn slice(&self, range: impl RangeBounds<usize> + Clone + Debug) -> Self {
217 let elements = self.elements.slice(range.clone());
218 let validity = self.validity.slice(range);
219 Self::new(elements, validity)
220 }
221
222 fn clear(&mut self) {
223 self.elements.clear();
224 self.validity.clear();
225 }
226
227 /// Try to convert self into a mutable vector.
228 fn try_into_mut(self) -> Result<PVectorMut<T>, Self> {
229 let elements = match self.elements.try_into_mut() {
230 Ok(elements) => elements,
231 Err(elements) => {
232 return Err(Self {
233 elements,
234 validity: self.validity,
235 });
236 }
237 };
238
239 match self.validity.try_into_mut() {
240 Ok(validity_mut) => Ok(PVectorMut {
241 elements,
242 validity: validity_mut,
243 }),
244 Err(validity) => Err(Self {
245 elements: elements.freeze(),
246 validity,
247 }),
248 }
249 }
250
251 fn into_mut(self) -> PVectorMut<T> {
252 let elements = self.elements.into_mut();
253 let validity = self.validity.into_mut();
254
255 PVectorMut { elements, validity }
256 }
257}
258
259impl<T: NativePType> From<Buffer<T>> for PVector<T> {
260 /// Creates a new [`PVector<T>`] from the given elements buffer, with an all valid validity.
261 fn from(value: Buffer<T>) -> Self {
262 let len = value.len();
263 Self {
264 elements: value,
265 validity: Mask::new_true(len),
266 }
267 }
268}