vortex_array/pipeline/
vec.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Vectors contain owned fixed-size canonical arrays of elements.
5//!
6
7// TODO(ngates): Currently, the data in a vector is Arc'd. We should consider whether we want the
8//  performance hit for as_mut(), or whether we want zero-copy cloning. Not clear that we ever
9//  need the clone behavior.
10
11use std::cell::{Ref, RefMut};
12use std::fmt::Debug;
13use std::ops::{Deref, DerefMut};
14
15use vortex_buffer::{Alignment, ByteBuffer, ByteBufferMut};
16
17use crate::pipeline::N;
18use crate::pipeline::bits::BitVector;
19use crate::pipeline::types::{Element, VType};
20use crate::pipeline::view::{View, ViewMut};
21
22/// Identifier for a vector in the pipeline execution context.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
24pub struct VectorId(pub(crate) usize);
25
26impl Deref for VectorId {
27    type Target = usize;
28
29    fn deref(&self) -> &Self::Target {
30        &self.0
31    }
32}
33
34/// A vector contains fixed-size owned data in canonical form.
35#[derive(Debug)]
36pub struct Vector {
37    /// The physical type of the vector, which defines how the elements are stored.
38    vtype: VType,
39    /// The allocated elements buffer.
40    /// Alignment is at least the size of the element type.
41    /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
42    elements: ByteBufferMut,
43    /// The validity mask for the vector, indicating which elements in the buffer are valid.
44    validity: BitVector,
45    // The length of the valid values in the vector.
46    len: usize,
47
48    /// Additional buffers of data used by the vector, such as string data.
49    // TODO(ngates): ideally these buffers are compressed somehow? E.g. using FSST?
50    #[allow(dead_code)]
51    data: Vec<ByteBuffer>,
52}
53
54impl Vector {
55    pub fn new<T: Element>() -> Self {
56        Self::new_with_vtype(T::vtype())
57    }
58
59    pub fn new_with_vtype(vtype: VType) -> Self {
60        let mut elements = ByteBufferMut::with_capacity_aligned(
61            vtype.byte_width() * N,
62            Alignment::new(vtype.byte_width()),
63        );
64        unsafe { elements.set_len(vtype.byte_width() * N) };
65
66        Self {
67            vtype,
68            elements,
69            validity: BitVector::full().clone(),
70            len: 0,
71            data: vec![],
72        }
73    }
74
75    #[inline(always)]
76    pub fn len(&self) -> usize {
77        self.len
78    }
79
80    pub fn is_empty(&self) -> bool {
81        self.len == 0
82    }
83
84    pub fn set_len(&mut self, len: usize) {
85        assert!(len <= N, "Length cannot exceed the capacity of the vector");
86        self.len = len;
87    }
88
89    pub fn as_mut_array<T: Element>(&mut self) -> &mut [T; N] {
90        assert_eq!(self.vtype, T::vtype());
91        unsafe { &mut *(self.elements.as_mut_ptr().cast::<T>().cast::<[T; N]>()) }
92    }
93
94    pub fn as_view_mut(&mut self) -> ViewMut<'_> {
95        ViewMut {
96            vtype: self.vtype,
97            elements: self.elements.as_mut_ptr().cast(),
98            validity: Some(self.validity.as_view_mut()),
99            data: vec![],
100            _marker: Default::default(),
101        }
102    }
103
104    pub fn as_view(&self) -> View<'_> {
105        View {
106            vtype: self.vtype,
107            elements: self.elements.as_ptr().cast(),
108            validity: Some(self.validity.as_view()),
109            len: self.len,
110            data: vec![],
111            _marker: Default::default(),
112        }
113    }
114}
115
116/// A [`VectorRef`] provides a small wrapper to allow accessing a [`View`] with the same lifetime
117/// as the borrowed vector, rather than the lifetime of the [`Ref`].
118pub struct VectorRef<'a> {
119    // Use to ensure that view and borrow have the same lifetime.
120    #[allow(dead_code)]
121    borrow: Ref<'a, Vector>,
122    view: View<'a>,
123}
124
125impl<'a> VectorRef<'a> {
126    pub fn new(borrow: Ref<'a, Vector>) -> Self {
127        let view = borrow.as_view();
128        // SAFETY: we continue to hold onto the [`Ref`], so it is safe to erase the lifetime.
129        let view = unsafe { std::mem::transmute::<View<'_>, View<'a>>(view) };
130        Self { borrow, view }
131    }
132
133    pub fn as_view(&self) -> &View<'a> {
134        &self.view
135    }
136}
137
138impl<'a> Deref for VectorRef<'a> {
139    type Target = View<'a>;
140
141    fn deref(&self) -> &Self::Target {
142        &self.view
143    }
144}
145
146/// A [`VectorRefMut`] provides a small wrapper to allow accessing a [`ViewMut`] with the same
147/// lifetime as the borrowed vector, rather than the lifetime of the [`RefMut`].
148pub struct VectorRefMut<'a> {
149    // Use to ensure that view and borrow have the same lifetime.
150    #[allow(dead_code)]
151    borrow: RefMut<'a, Vector>,
152    view: ViewMut<'a>,
153}
154
155impl<'a> VectorRefMut<'a> {
156    pub fn new(mut borrow: RefMut<'a, Vector>) -> Self {
157        let view = borrow.as_view_mut();
158        // SAFETY: we continue to hold onto the [`Ref`], so it is safe to erase the lifetime.
159        let view = unsafe { std::mem::transmute::<ViewMut<'_>, ViewMut<'a>>(view) };
160        Self { borrow, view }
161    }
162}
163
164impl<'a> Deref for VectorRefMut<'a> {
165    type Target = ViewMut<'a>;
166
167    fn deref(&self) -> &Self::Target {
168        &self.view
169    }
170}
171
172impl<'a> DerefMut for VectorRefMut<'a> {
173    fn deref_mut(&mut self) -> &mut Self::Target {
174        &mut self.view
175    }
176}