vortex_array/pipeline/
vec.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Vectors contain owned fixed-size canonical arrays of elements.
5//!
6
7// TODO(ngates): Currently, the data in a vector is Arc'd. We should consider whether we want the
8//  performance hit for as_mut(), or whether we want zero-copy cloning. Not clear that we ever
9//  need the clone behavior.
10
11use std::cell::{Ref, RefMut};
12use std::fmt::Debug;
13use std::ops::{Deref, DerefMut};
14
15use vortex_buffer::{Alignment, ByteBuffer, ByteBufferMut};
16
17use crate::pipeline::N;
18use crate::pipeline::bits::BitVector;
19use crate::pipeline::types::{Element, VType};
20use crate::pipeline::view::{View, ViewMut};
21
22/// A vector contains fixed-size owned data in canonical form.
23#[derive(Debug)]
24pub struct Vector {
25    /// The physical type of the vector, which defines how the elements are stored.
26    vtype: VType,
27    /// The allocated elements buffer.
28    /// Alignment is at least the size of the element type.
29    /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
30    elements: ByteBufferMut,
31    /// The validity mask for the vector, indicating which elements in the buffer are valid.
32    validity: BitVector,
33    // The position of the selected values in the vector.
34    selection: Selection,
35
36    /// Additional buffers of data used by the vector, such as string data.
37    // TODO(ngates): ideally these buffers are compressed somehow? E.g. using FSST?
38    #[allow(dead_code)]
39    data: Vec<ByteBuffer>,
40}
41
42impl Vector {
43    pub fn new<T: Element>() -> Self {
44        Self::new_with_vtype(T::vtype())
45    }
46
47    pub fn new_with_vtype(vtype: VType) -> Self {
48        let mut elements = ByteBufferMut::with_capacity_aligned(
49            vtype.byte_width() * N,
50            Alignment::new(vtype.byte_width()),
51        );
52        unsafe { elements.set_len(vtype.byte_width() * N) };
53
54        Self {
55            vtype,
56            elements,
57            validity: BitVector::full().clone(),
58            selection: Selection::Prefix,
59            data: vec![],
60        }
61    }
62
63    pub fn set_selection(&mut self, selection: Selection) {
64        self.selection = selection;
65    }
66
67    pub fn as_mut_array<T: Element>(&mut self) -> &mut [T; N] {
68        assert_eq!(self.vtype, T::vtype());
69        unsafe { &mut *(self.elements.as_mut_ptr().cast::<T>().cast::<[T; N]>()) }
70    }
71
72    pub fn as_view_mut(&mut self) -> ViewMut<'_> {
73        ViewMut {
74            vtype: self.vtype,
75            elements: self.elements.as_mut_ptr().cast(),
76            validity: Some(self.validity.as_view_mut()),
77            data: vec![],
78            selection: self.selection,
79            _marker: Default::default(),
80        }
81    }
82
83    pub fn as_view(&self) -> View<'_> {
84        View {
85            vtype: self.vtype,
86            elements: self.elements.as_ptr().cast(),
87            validity: Some(self.validity.as_view()),
88            selection: self.selection,
89            data: vec![],
90            _marker: Default::default(),
91        }
92    }
93}
94
95/// A [`VectorRef`] provides a small wrapper to allow accessing a [`View`] with the same lifetime
96/// as the borrowed vector, rather than the lifetime of the [`Ref`].
97pub struct VectorRef<'a> {
98    // Use to ensure that view and borrow have the same lifetime.
99    #[allow(dead_code)]
100    borrow: Ref<'a, Vector>,
101    view: View<'a>,
102}
103
104impl<'a> VectorRef<'a> {
105    pub fn new(borrow: Ref<'a, Vector>) -> Self {
106        let view = borrow.as_view();
107        // SAFETY: we continue to hold onto the [`Ref`], so it is safe to erase the lifetime.
108        let view = unsafe { std::mem::transmute::<View<'_>, View<'a>>(view) };
109        Self { borrow, view }
110    }
111
112    pub fn as_view(&self) -> &View<'a> {
113        &self.view
114    }
115}
116
117impl<'a> Deref for VectorRef<'a> {
118    type Target = View<'a>;
119
120    fn deref(&self) -> &Self::Target {
121        &self.view
122    }
123}
124
125/// A [`VectorRefMut`] provides a small wrapper to allow accessing a [`ViewMut`] with the same
126/// lifetime as the borrowed vector, rather than the lifetime of the [`RefMut`].
127pub struct VectorRefMut<'a> {
128    // Use to ensure that view and borrow have the same lifetime.
129    #[allow(dead_code)]
130    borrow: RefMut<'a, Vector>,
131    view: ViewMut<'a>,
132}
133
134impl<'a> VectorRefMut<'a> {
135    pub fn new(mut borrow: RefMut<'a, Vector>) -> Self {
136        let view = borrow.as_view_mut();
137        // SAFETY: we continue to hold onto the [`Ref`], so it is safe to erase the lifetime.
138        let view = unsafe { std::mem::transmute::<ViewMut<'_>, ViewMut<'a>>(view) };
139        Self { borrow, view }
140    }
141}
142
143impl<'a> Deref for VectorRefMut<'a> {
144    type Target = ViewMut<'a>;
145
146    fn deref(&self) -> &Self::Target {
147        &self.view
148    }
149}
150
151impl<'a> DerefMut for VectorRefMut<'a> {
152    fn deref_mut(&mut self) -> &mut Self::Target {
153        &mut self.view
154    }
155}
156
157#[derive(Debug, Clone, Copy, PartialEq, Eq)]
158pub enum Selection {
159    Prefix,
160    Mask,
161}