vortex_array/pipeline/
view.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_buffer::ByteBuffer;
5use vortex_error::VortexExpect;
6
7use crate::pipeline::N;
8use crate::pipeline::bits::{BitView, BitViewMut};
9use crate::pipeline::types::{Element, VType};
10use crate::pipeline::vec::Selection;
11
12pub struct View<'a> {
13    /// The physical type of the vector, which defines how the elements are stored.
14    pub(super) vtype: VType,
15    /// A pointer to the allocated elements buffer.
16    /// Alignment is at least the size of the element type.
17    /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
18    pub(super) elements: *const u8,
19    /// The validity mask for the vector, indicating which elements in the buffer are valid.
20    /// This value can be `None` if the expected DType is `NonNullable`.
21    // TODO: support validity
22    #[allow(dead_code)]
23    pub(super) validity: Option<BitView<'a>>,
24
25    // Indicates where the selected elements are positioned within the vector.
26    pub(super) selection: Selection,
27
28    /// Additional buffers of data used by the vector, such as string data.
29    #[allow(dead_code)]
30    pub(super) data: Vec<ByteBuffer>,
31
32    /// Marker defining the lifetime of the contents of the vector.
33    pub(super) _marker: std::marker::PhantomData<&'a ()>,
34}
35
36impl<'a> View<'a> {
37    #[inline(always)]
38    pub fn selection(&self) -> Selection {
39        self.selection
40    }
41
42    pub fn as_array<T>(&self) -> &'a [T; N]
43    where
44        T: Element,
45    {
46        debug_assert_eq!(self.vtype, T::vtype(), "Invalid type for canonical view");
47        // SAFETY: We assume that the elements are of type T and that the view is valid.
48        unsafe { &*(self.elements.cast::<T>() as *const [T; N]) }
49    }
50
51    /// Re-interpret cast the vector into a new type where the element has the same width.
52    #[inline(always)]
53    pub fn reinterpret_as<E: Element>(&mut self) {
54        assert_eq!(
55            self.vtype.byte_width(),
56            size_of::<E>(),
57            "Cannot reinterpret {} as {}",
58            self.vtype,
59            E::vtype()
60        );
61        self.vtype = E::vtype();
62    }
63}
64
65pub struct ViewMut<'a> {
66    /// The physical type of the vector, which defines how the elements are stored.
67    pub(super) vtype: VType,
68    /// A pointer to the allocated elements buffer.
69    /// Alignment is at least the size of the element type.
70    /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
71    // TODO(ngates): it would be nice to guarantee _wider_ alignment, ideally 128 bytes, so that
72    //  we can use aligned load/store instructions for wide SIMD lanes.
73    pub(super) elements: *mut u8,
74    /// The validity mask for the vector, indicating which elements in the buffer are valid.
75    /// This value can be `None` if the expected DType is `NonNullable`.
76    pub(super) validity: Option<BitViewMut<'a>>,
77
78    /// Additional buffers of data used by the vector, such as string data.
79    // TODO(ngates): ideally these buffers are compressed somehow? E.g. using FSST?
80    #[allow(dead_code)]
81    pub(super) data: Vec<ByteBuffer>,
82
83    /// The position of the selected values of this buffer.
84    /// One of:
85    /// * All - all N values are selected.
86    /// * Prefix - the first n values are selected where i is the true count of the kernel mask.
87    /// * Mask - the values are in the positions indicated by the kernel mask.
88    pub(super) selection: Selection,
89
90    /// Marker defining the lifetime of the contents of the vector.
91    pub(super) _marker: std::marker::PhantomData<&'a mut ()>,
92}
93
94impl<'a> ViewMut<'a> {
95    pub fn new<E: Element>(elements: &'a mut [E], validity: Option<BitViewMut<'a>>) -> Self {
96        assert_eq!(elements.len(), N);
97        Self {
98            vtype: E::vtype(),
99            elements: elements.as_mut_ptr().cast(),
100            validity,
101            data: vec![],
102            selection: Selection::Prefix,
103            _marker: Default::default(),
104        }
105    }
106
107    /// Re-interpret cast the vector into a new type where the element has the same width.
108    #[inline(always)]
109    pub fn reinterpret_as<E: Element>(&mut self) {
110        assert_eq!(
111            self.vtype.byte_width(),
112            size_of::<E>(),
113            "Cannot reinterpret {} as {}",
114            self.vtype,
115            E::vtype()
116        );
117        self.vtype = E::vtype();
118    }
119
120    /// Returns an immutable array of the elements in the vector.
121    #[inline(always)]
122    pub fn as_array<E: Element>(&self) -> &'a [E; N] {
123        debug_assert_eq!(self.vtype, E::vtype(), "Invalid type for canonical view");
124        unsafe { &*(self.elements.cast::<E>() as *const [E; N]) }
125    }
126
127    /// Returns a mutable array of the elements in the vector, allowing for modification.
128    #[inline(always)]
129    pub fn as_array_mut<E: Element>(&mut self) -> &'a mut [E; N] {
130        debug_assert_eq!(self.vtype, E::vtype(), "Invalid type for canonical view");
131        unsafe { &mut *(self.elements.cast::<E>() as *mut [E; N]) }
132    }
133
134    /// Access the validity mask of the vector.
135    ///
136    /// ## Panics
137    ///
138    /// Panics if the vector does not support validity, i.e. if the DType was non-nullable when
139    /// it was created.
140    pub fn validity(&mut self) -> &mut BitViewMut<'a> {
141        self.validity
142            .as_mut()
143            .vortex_expect("Vector does not support validity")
144    }
145
146    pub fn add_buffer(&mut self, buffer: ByteBuffer) {
147        self.data.push(buffer);
148    }
149
150    #[inline(always)]
151    pub fn selection(&self) -> Selection {
152        self.selection
153    }
154
155    pub fn set_selection(&mut self, selection: Selection) {
156        self.selection = selection;
157    }
158
159    /// Flatten the view by bringing the selected elements of the mask to the beginning of
160    pub fn flatten<E: Element>(&mut self, selection: &BitView<'_>) {
161        assert_eq!(
162            self.vtype,
163            E::vtype(),
164            "ViewMut::flatten_mask: type mismatch"
165        );
166
167        if matches!(self.selection, Selection::Prefix) {
168            // Nothing to do, all elements are already selected.
169            return;
170        }
171
172        match selection.true_count() {
173            0 | N => {
174                // If the mask has no true bits or all true bits, we are already flattened.
175            }
176            n if n > 3 * N / 4 => {
177                // High density: use iter_zeros to compact by removing gaps
178                let slice = self.as_array_mut::<E>();
179                let mut write_idx = 0;
180                let mut read_idx = 0;
181
182                selection.iter_zeros(|zero_idx| {
183                    // Copy elements from read_idx to zero_idx (exclusive) to write_idx
184                    let count = zero_idx - read_idx;
185                    unsafe {
186                        // SAFETY: We assume that the elements are of type E and that the view is valid.
187                        // Using memmove for potentially overlapping regions
188                        std::ptr::copy(
189                            slice.as_ptr().add(read_idx),
190                            slice.as_mut_ptr().add(write_idx),
191                            count,
192                        );
193                        write_idx += count;
194                    }
195                    read_idx = zero_idx + 1;
196                });
197
198                // Copy any remaining elements after the last zero
199                unsafe {
200                    std::ptr::copy(
201                        slice.as_ptr().add(read_idx),
202                        slice.as_mut_ptr().add(write_idx),
203                        N - read_idx,
204                    );
205                }
206            }
207            _ => {
208                let mut offset = 0;
209                let slice = self.as_array_mut::<E>();
210                selection.iter_ones(|idx| {
211                    unsafe {
212                        // SAFETY: We assume that the elements are of type E and that the view is valid.
213                        let value = *slice.get_unchecked(idx);
214                        // TODO(joe): use ptr increment (not offset).
215                        *slice.get_unchecked_mut(offset) = value;
216
217                        offset += 1;
218                    }
219                });
220            }
221        }
222
223        self.selection = Selection::Prefix
224    }
225}