vortex_array/pipeline/
view.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5
6use vortex_buffer::ByteBuffer;
7use vortex_error::VortexExpect;
8
9use crate::pipeline::N;
10use crate::pipeline::bits::{BitView, BitViewMut};
11use crate::pipeline::types::{Element, VType};
12
13pub struct View<'a> {
14    /// The physical type of the vector, which defines how the elements are stored.
15    pub(super) vtype: VType,
16    /// A pointer to the allocated elements buffer.
17    /// Alignment is at least the size of the element type.
18    /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
19    pub(super) elements: *const u8,
20    /// The validity mask for the vector, indicating which elements in the buffer are valid.
21    /// This value can be `None` if the expected DType is `NonNullable`.
22    // TODO: support validity
23    #[allow(dead_code)]
24    pub(super) validity: Option<BitView<'a>>,
25    // A selection mask over the elements and validity of the vector.
26    pub(super) len: usize,
27
28    /// Additional buffers of data used by the vector, such as string data.
29    #[allow(dead_code)]
30    pub(super) data: Vec<ByteBuffer>,
31
32    /// Marker defining the lifetime of the contents of the vector.
33    pub(super) _marker: std::marker::PhantomData<&'a ()>,
34}
35
36impl<'a> View<'a> {
37    #[inline(always)]
38    pub fn len(&self) -> usize {
39        self.len
40    }
41
42    pub fn is_empty(&self) -> bool {
43        self.len == 0
44    }
45
46    // FIXME(ngates): we should return &[T; N]
47    pub fn as_slice<T>(&self) -> &'a [T]
48    where
49        T: Element,
50    {
51        debug_assert_eq!(self.vtype, T::vtype(), "Invalid type for canonical view");
52        // SAFETY: We assume that the elements are of type T and that the view is valid.
53        unsafe { std::slice::from_raw_parts(self.elements.cast(), self.len) }
54    }
55
56    /// Re-interpret cast the vector into a new type where the element has the same width.
57    #[inline(always)]
58    pub fn reinterpret_as<E: Element>(&mut self) {
59        assert_eq!(
60            self.vtype.byte_width(),
61            size_of::<E>(),
62            "Cannot reinterpret {} as {}",
63            self.vtype,
64            E::vtype()
65        );
66        self.vtype = E::vtype();
67    }
68}
69
70pub struct ViewMut<'a> {
71    /// The physical type of the vector, which defines how the elements are stored.
72    pub(super) vtype: VType,
73    /// A pointer to the allocated elements buffer.
74    /// Alignment is at least the size of the element type.
75    /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
76    // TODO(ngates): it would be nice to guarantee _wider_ alignment, ideally 128 bytes, so that
77    //  we can use aligned load/store instructions for wide SIMD lanes.
78    pub(super) elements: *mut u8,
79    /// The validity mask for the vector, indicating which elements in the buffer are valid.
80    /// This value can be `None` if the expected DType is `NonNullable`.
81    pub(super) validity: Option<BitViewMut<'a>>,
82
83    /// Additional buffers of data used by the vector, such as string data.
84    // TODO(ngates): ideally these buffers are compressed somehow? E.g. using FSST?
85    #[allow(dead_code)]
86    pub(super) data: Vec<ByteBuffer>,
87
88    /// The length of the prefix slice containing valid values.
89    pub(super) len: usize,
90
91    /// Marker defining the lifetime of the contents of the vector.
92    pub(super) _marker: std::marker::PhantomData<&'a mut ()>,
93}
94
95impl<'a> ViewMut<'a> {
96    pub fn new<E: Element>(elements: &'a mut [E], validity: Option<BitViewMut<'a>>) -> Self {
97        assert_eq!(elements.len(), N);
98        Self {
99            vtype: E::vtype(),
100            elements: elements.as_mut_ptr().cast(),
101            validity,
102            data: vec![],
103            len: N,
104            _marker: Default::default(),
105        }
106    }
107
108    /// Re-interpret cast the vector into a new type where the element has the same width.
109    #[inline(always)]
110    pub fn reinterpret_as<E: Element>(&mut self) {
111        assert_eq!(
112            self.vtype.byte_width(),
113            size_of::<E>(),
114            "Cannot reinterpret {} as {}",
115            self.vtype,
116            E::vtype()
117        );
118        self.vtype = E::vtype();
119    }
120
121    /// Returns an immutable slice of the elements in the vector.
122    #[inline(always)]
123    pub fn as_slice<E: Element>(&self) -> &'a [E] {
124        debug_assert_eq!(self.vtype, E::vtype(), "Invalid type for canonical view");
125        unsafe { std::slice::from_raw_parts(self.elements.cast::<E>(), self.len) }
126    }
127
128    /// Returns a mutable slice of the elements in the vector, allowing for modification.
129    #[inline(always)]
130    pub fn as_slice_mut<E: Element>(&mut self) -> &'a mut [E] {
131        debug_assert_eq!(self.vtype, E::vtype(), "Invalid type for canonical view");
132        unsafe { std::slice::from_raw_parts_mut(self.elements.cast::<E>(), self.len) }
133    }
134
135    /// Access the validity mask of the vector.
136    ///
137    /// ## Panics
138    ///
139    /// Panics if the vector does not support validity, i.e. if the DType was non-nullable when
140    /// it was created.
141    pub fn validity(&mut self) -> &mut BitViewMut<'a> {
142        self.validity
143            .as_mut()
144            .vortex_expect("Vector does not support validity")
145    }
146
147    pub fn add_buffer(&mut self, buffer: ByteBuffer) {
148        self.data.push(buffer);
149    }
150
151    pub fn set_len(&mut self, len: usize) {
152        assert!(len <= N, "Length cannot exceed the capacity of the vector");
153        self.len = len;
154    }
155
156    /// Flatten the view by bringing the selected elements of the mask to the beginning of
157    /// the elements buffer.
158    ///
159    /// FIXME(ngates): also need to select validity bits.
160    pub fn select_mask<E: Element + Display>(&mut self, mask: &BitView) {
161        assert_eq!(
162            self.vtype,
163            E::vtype(),
164            "ViewMut::flatten_mask: type mismatch"
165        );
166
167        match mask.true_count() {
168            0 => {
169                // If the mask has no true bits, we set the length to 0.
170            }
171            N => {
172                // If the mask has N true bits, we copy all elements.
173            }
174            n if n > 3 * N / 4 => {
175                // High density: use iter_zeros to compact by removing gaps
176                let slice = self.as_slice_mut::<E>();
177                let mut write_idx = 0;
178                let mut read_idx = 0;
179
180                mask.iter_zeros(|zero_idx| {
181                    // Copy elements from read_idx to zero_idx (exclusive) to write_idx
182                    let count = zero_idx - read_idx;
183                    unsafe {
184                        // SAFETY: We assume that the elements are of type E and that the view is valid.
185                        // Using memmove for potentially overlapping regions
186                        std::ptr::copy(
187                            slice.as_ptr().add(read_idx),
188                            slice.as_mut_ptr().add(write_idx),
189                            count,
190                        );
191                        write_idx += count;
192                    }
193                    read_idx = zero_idx + 1;
194                });
195
196                // Copy any remaining elements after the last zero
197                unsafe {
198                    std::ptr::copy(
199                        slice.as_ptr().add(read_idx),
200                        slice.as_mut_ptr().add(write_idx),
201                        N - read_idx,
202                    );
203                }
204            }
205            _ => {
206                let mut offset = 0;
207                let slice = self.as_slice_mut::<E>();
208                mask.iter_ones(|idx| {
209                    unsafe {
210                        // SAFETY: We assume that the elements are of type E and that the view is valid.
211                        let value = *slice.get_unchecked(idx);
212                        // TODO(joe): use ptr increment (not offset).
213                        *slice.get_unchecked_mut(offset) = value;
214
215                        offset += 1;
216                    }
217                });
218            }
219        }
220
221        self.len = mask.true_count();
222    }
223}