vortex_array/pipeline/
vec.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Vectors contain owned fixed-size canonical arrays of elements.
5//!
6
7// TODO(ngates): Currently, the data in a vector is Arc'd. We should consider whether we want the
8//  performance hit for as_mut(), or whether we want zero-copy cloning. Not clear that we ever
9//  need the clone behavior.
10
11use std::cell::{Ref, RefMut};
12use std::fmt::Debug;
13use std::ops::{Deref, DerefMut};
14
15use vortex_buffer::{Alignment, ByteBuffer, ByteBufferMut};
16
17use crate::pipeline::N;
18use crate::pipeline::bits::BitVector;
19use crate::pipeline::types::{Element, VType};
20use crate::pipeline::view::{View, ViewMut};
21
22/// A vector contains fixed-size owned data in canonical form.
23#[derive(Debug)]
24pub struct Vector {
25    /// The physical type of the vector, which defines how the elements are stored.
26    vtype: VType,
27    /// The allocated elements buffer.
28    /// Alignment is at least the size of the element type.
29    /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
30    elements: ByteBufferMut,
31    /// The validity mask for the vector, indicating which elements in the buffer are valid.
32    validity: BitVector,
33    // The length of the valid values in the vector.
34    len: usize,
35
36    /// Additional buffers of data used by the vector, such as string data.
37    // TODO(ngates): ideally these buffers are compressed somehow? E.g. using FSST?
38    #[allow(dead_code)]
39    data: Vec<ByteBuffer>,
40}
41
42impl Vector {
43    pub fn new<T: Element>() -> Self {
44        Self::new_with_vtype(T::vtype())
45    }
46
47    pub fn new_with_vtype(vtype: VType) -> Self {
48        let mut elements = ByteBufferMut::with_capacity_aligned(
49            vtype.byte_width() * N,
50            Alignment::new(vtype.byte_width()),
51        );
52        unsafe { elements.set_len(vtype.byte_width() * N) };
53
54        Self {
55            vtype,
56            elements,
57            validity: BitVector::full().clone(),
58            len: 0,
59            data: vec![],
60        }
61    }
62
63    #[inline(always)]
64    pub fn len(&self) -> usize {
65        self.len
66    }
67
68    pub fn is_empty(&self) -> bool {
69        self.len == 0
70    }
71
72    pub fn set_len(&mut self, len: usize) {
73        assert!(len <= N, "Length cannot exceed the capacity of the vector");
74        self.len = len;
75    }
76
77    pub fn as_mut_array<T: Element>(&mut self) -> &mut [T; N] {
78        assert_eq!(self.vtype, T::vtype());
79        unsafe { &mut *(self.elements.as_mut_ptr().cast::<T>().cast::<[T; N]>()) }
80    }
81
82    pub fn as_view_mut(&mut self) -> ViewMut<'_> {
83        ViewMut {
84            vtype: self.vtype,
85            elements: self.elements.as_mut_ptr().cast(),
86            validity: Some(self.validity.as_view_mut()),
87            data: vec![],
88            len: self.len,
89            _marker: Default::default(),
90        }
91    }
92
93    pub fn as_view(&self) -> View<'_> {
94        View {
95            vtype: self.vtype,
96            elements: self.elements.as_ptr().cast(),
97            validity: Some(self.validity.as_view()),
98            len: self.len,
99            data: vec![],
100            _marker: Default::default(),
101        }
102    }
103}
104
105/// A [`VectorRef`] provides a small wrapper to allow accessing a [`View`] with the same lifetime
106/// as the borrowed vector, rather than the lifetime of the [`Ref`].
107pub struct VectorRef<'a> {
108    // Use to ensure that view and borrow have the same lifetime.
109    #[allow(dead_code)]
110    borrow: Ref<'a, Vector>,
111    view: View<'a>,
112}
113
114impl<'a> VectorRef<'a> {
115    pub fn new(borrow: Ref<'a, Vector>) -> Self {
116        let view = borrow.as_view();
117        // SAFETY: we continue to hold onto the [`Ref`], so it is safe to erase the lifetime.
118        let view = unsafe { std::mem::transmute::<View<'_>, View<'a>>(view) };
119        Self { borrow, view }
120    }
121
122    pub fn as_view(&self) -> &View<'a> {
123        &self.view
124    }
125}
126
127impl<'a> Deref for VectorRef<'a> {
128    type Target = View<'a>;
129
130    fn deref(&self) -> &Self::Target {
131        &self.view
132    }
133}
134
135/// A [`VectorRefMut`] provides a small wrapper to allow accessing a [`ViewMut`] with the same
136/// lifetime as the borrowed vector, rather than the lifetime of the [`RefMut`].
137pub struct VectorRefMut<'a> {
138    // Use to ensure that view and borrow have the same lifetime.
139    #[allow(dead_code)]
140    borrow: RefMut<'a, Vector>,
141    view: ViewMut<'a>,
142}
143
144impl<'a> VectorRefMut<'a> {
145    pub fn new(mut borrow: RefMut<'a, Vector>) -> Self {
146        let view = borrow.as_view_mut();
147        // SAFETY: we continue to hold onto the [`Ref`], so it is safe to erase the lifetime.
148        let view = unsafe { std::mem::transmute::<ViewMut<'_>, ViewMut<'a>>(view) };
149        Self { borrow, view }
150    }
151}
152
153impl<'a> Deref for VectorRefMut<'a> {
154    type Target = ViewMut<'a>;
155
156    fn deref(&self) -> &Self::Target {
157        &self.view
158    }
159}
160
161impl<'a> DerefMut for VectorRefMut<'a> {
162    fn deref_mut(&mut self) -> &mut Self::Target {
163        &mut self.view
164    }
165}