vortex_vector/primitive/
generic.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`PVector<T>`].
5
6use std::fmt::Debug;
7use std::ops::BitAnd;
8use std::ops::RangeBounds;
9
10use vortex_buffer::Buffer;
11use vortex_dtype::NativePType;
12use vortex_error::VortexExpect;
13use vortex_error::VortexResult;
14use vortex_error::vortex_ensure;
15use vortex_mask::Mask;
16
17use crate::VectorOps;
18use crate::primitive::PScalar;
19use crate::primitive::PVectorMut;
20
21/// An immutable vector of generic primitive values.
22///
23/// `T` is expected to be bound by [`NativePType`], which templates an internal [`Buffer<T>`] that
24/// stores the elements of the vector.
25#[derive(Default, Debug, Clone)]
26pub struct PVector<T> {
27    /// The buffer representing the vector elements.
28    pub(super) elements: Buffer<T>,
29    /// The validity mask (where `true` represents an element is **not** null).
30    pub(super) validity: Mask,
31}
32
33impl<T: NativePType + PartialEq> PartialEq for PVector<T> {
34    fn eq(&self, other: &Self) -> bool {
35        if self.len() != other.len() {
36            return false;
37        }
38        // Validity patterns must match
39        if self.validity != other.validity {
40            return false;
41        }
42        // Compare all elements, OR with !validity to ignore invalid positions
43        self.elements
44            .iter()
45            .zip(other.elements.iter())
46            .enumerate()
47            .all(|(i, (a, b))| !self.validity.value(i) | (a == b))
48    }
49}
50
51impl<T: NativePType + Eq> Eq for PVector<T> {}
52
53impl<T> PVector<T> {
54    /// Creates a new [`PVector<T>`] from the given elements buffer and validity mask.
55    ///
56    /// # Panics
57    ///
58    /// Panics if the length of the validity mask does not match the length of the elements buffer.
59    pub fn new(elements: Buffer<T>, validity: Mask) -> Self {
60        Self::try_new(elements, validity).vortex_expect("Failed to create `PVector`")
61    }
62
63    /// Tries to create a new [`PVector<T>`] from the given elements buffer and validity mask.
64    ///
65    /// # Errors
66    ///
67    /// Returns an error if the length of the validity mask does not match the length of the
68    /// elements buffer.
69    pub fn try_new(elements: Buffer<T>, validity: Mask) -> VortexResult<Self> {
70        vortex_ensure!(
71            validity.len() == elements.len(),
72            "`PVector` validity mask must have the same length as elements"
73        );
74        Ok(Self { elements, validity })
75    }
76
77    /// Creates a new [`PVector<T>`] from the given elements buffer and validity mask without
78    /// validation.
79    ///
80    /// # Safety
81    ///
82    /// The caller must ensure that the validity mask has the same length as the elements buffer.
83    pub unsafe fn new_unchecked(elements: Buffer<T>, validity: Mask) -> Self {
84        if cfg!(debug_assertions) {
85            Self::new(elements, validity)
86        } else {
87            Self { elements, validity }
88        }
89    }
90
91    /// Decomposes the primitive vector into its constituent parts (buffer and validity).
92    pub fn into_parts(self) -> (Buffer<T>, Mask) {
93        (self.elements, self.validity)
94    }
95
96    /// Decomposes the primitive vector into its internal buffer, consuming the vector.
97    ///
98    /// # Panics
99    ///
100    /// Panics if there are any null values in the vector.
101    pub fn into_nonnull_buffer(self) -> Buffer<T> {
102        assert!(
103            self.validity.all_true(),
104            "Cannot convert to buffer: vector contains null values"
105        );
106        self.elements
107    }
108
109    /// Decomposes the primitive vector into its constituent parts by mutable reference.
110    ///
111    /// # Safety
112    ///
113    /// The caller must ensure that no other references to the internal parts exist while mutable
114    pub unsafe fn as_parts_mut(&mut self) -> (&mut Buffer<T>, &mut Mask) {
115        (&mut self.elements, &mut self.validity)
116    }
117
118    /// Gets a nullable element at the given index, panicking on out-of-bounds.
119    ///
120    /// If the element at the given index is null, returns `None`. Otherwise, returns `Some(x)`,
121    /// where `x: T`.
122    ///
123    /// Note that this `get` method is different from the standard library [`slice::get`], which
124    /// returns `None` if the index is out of bounds. This method will panic if the index is out of
125    /// bounds, and return `None` if the elements is null.
126    ///
127    /// # Panics
128    ///
129    /// Panics if the index is out of bounds.
130    pub fn get(&self, index: usize) -> Option<&T> {
131        self.validity.value(index).then(|| &self.elements[index])
132    }
133
134    /// Gets an element at the given index and converts it to type `U`.
135    ///
136    /// Returns `None` if:
137    /// - The element at the given index is null
138    /// - The conversion from `T` to `U` fails
139    ///
140    /// # Panics
141    ///
142    /// Panics if the index is out of bounds.
143    pub fn get_as<U>(&self, index: usize) -> Option<U>
144    where
145        U: TryFrom<T>,
146        T: Copy,
147    {
148        self.get(index).and_then(|&v| U::try_from(v).ok())
149    }
150
151    /// Returns the internal [`Buffer`] of the [`PVector`].
152    ///
153    /// Note that the internal buffer may hold garbage data in place of nulls. That information is
154    /// tracked by the [`validity()`](Self::validity).
155    #[inline]
156    pub fn elements(&self) -> &Buffer<T> {
157        &self.elements
158    }
159
160    /// Transmute a `PVector<T>` into a `PVector<U>`.
161    ///
162    /// # Safety
163    ///
164    /// The caller must ensure that all values of type `T` in this vector are valid as type `U`.
165    /// See [`std::mem::transmute`] for more details.
166    ///
167    /// # Panics
168    ///
169    /// Panics if the type `U` does not have the same size and alignment as `T`.
170    pub unsafe fn transmute<U: NativePType>(self) -> PVector<U> {
171        let (buffer, mask) = self.into_parts();
172
173        // SAFETY: same guarantees as this function.
174        let buffer = unsafe { buffer.transmute::<U>() };
175
176        PVector::new(buffer, mask)
177    }
178}
179
180impl<T: NativePType> AsRef<[T]> for PVector<T> {
181    /// Returns an immutable slice over the internal buffer with elements of type `T`.
182    ///
183    /// Note that this slice may contain garbage data where the [`validity()`] mask states that an
184    /// element is invalid.
185    ///
186    /// The caller should check the [`validity()`] before performing any operations.
187    ///
188    /// [`validity()`]: crate::VectorOps::validity
189    #[inline]
190    fn as_ref(&self) -> &[T] {
191        self.elements.as_slice()
192    }
193}
194
195impl<T: NativePType> VectorOps for PVector<T> {
196    type Mutable = PVectorMut<T>;
197    type Scalar = PScalar<T>;
198
199    fn len(&self) -> usize {
200        self.elements.len()
201    }
202
203    fn validity(&self) -> &Mask {
204        &self.validity
205    }
206
207    fn mask_validity(&mut self, mask: &Mask) {
208        self.validity = self.validity.bitand(mask);
209    }
210
211    fn scalar_at(&self, index: usize) -> PScalar<T> {
212        assert!(index < self.len(), "Index out of bounds in `PVector`");
213        PScalar::<T>::new(self.validity.value(index).then(|| self.elements[index]))
214    }
215
216    fn slice(&self, range: impl RangeBounds<usize> + Clone + Debug) -> Self {
217        let elements = self.elements.slice(range.clone());
218        let validity = self.validity.slice(range);
219        Self::new(elements, validity)
220    }
221
222    fn clear(&mut self) {
223        self.elements.clear();
224        self.validity.clear();
225    }
226
227    /// Try to convert self into a mutable vector.
228    fn try_into_mut(self) -> Result<PVectorMut<T>, Self> {
229        let elements = match self.elements.try_into_mut() {
230            Ok(elements) => elements,
231            Err(elements) => {
232                return Err(Self {
233                    elements,
234                    validity: self.validity,
235                });
236            }
237        };
238
239        match self.validity.try_into_mut() {
240            Ok(validity_mut) => Ok(PVectorMut {
241                elements,
242                validity: validity_mut,
243            }),
244            Err(validity) => Err(Self {
245                elements: elements.freeze(),
246                validity,
247            }),
248        }
249    }
250
251    fn into_mut(self) -> PVectorMut<T> {
252        let elements = self.elements.into_mut();
253        let validity = self.validity.into_mut();
254
255        PVectorMut { elements, validity }
256    }
257}
258
259impl<T: NativePType> From<Buffer<T>> for PVector<T> {
260    /// Creates a new [`PVector<T>`] from the given elements buffer, with an all valid validity.
261    fn from(value: Buffer<T>) -> Self {
262        let len = value.len();
263        Self {
264            elements: value,
265            validity: Mask::new_true(len),
266        }
267    }
268}