vortex_array/pipeline/view.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_buffer::ByteBuffer;
5use vortex_error::VortexExpect;
6
7use crate::pipeline::N;
8use crate::pipeline::bits::{BitView, BitViewMut};
9use crate::pipeline::types::{Element, VType};
10use crate::pipeline::vec::Selection;
11
12pub struct View<'a> {
13 /// The physical type of the vector, which defines how the elements are stored.
14 pub(super) vtype: VType,
15 /// A pointer to the allocated elements buffer.
16 /// Alignment is at least the size of the element type.
17 /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
18 pub(super) elements: *const u8,
19 /// The validity mask for the vector, indicating which elements in the buffer are valid.
20 /// This value can be `None` if the expected DType is `NonNullable`.
21 // TODO: support validity
22 #[allow(dead_code)]
23 pub(super) validity: Option<BitView<'a>>,
24
25 // Indicates where the selected elements are positioned within the vector.
26 pub(super) selection: Selection,
27
28 /// Additional buffers of data used by the vector, such as string data.
29 #[allow(dead_code)]
30 pub(super) data: Vec<ByteBuffer>,
31
32 /// Marker defining the lifetime of the contents of the vector.
33 pub(super) _marker: std::marker::PhantomData<&'a ()>,
34}
35
36impl<'a> View<'a> {
37 #[inline(always)]
38 pub fn selection(&self) -> Selection {
39 self.selection
40 }
41
42 pub fn as_array<T>(&self) -> &'a [T; N]
43 where
44 T: Element,
45 {
46 debug_assert_eq!(self.vtype, T::vtype(), "Invalid type for canonical view");
47 // SAFETY: We assume that the elements are of type T and that the view is valid.
48 unsafe { &*(self.elements.cast::<T>() as *const [T; N]) }
49 }
50
51 /// Re-interpret cast the vector into a new type where the element has the same width.
52 #[inline(always)]
53 pub fn reinterpret_as<E: Element>(&mut self) {
54 assert_eq!(
55 self.vtype.byte_width(),
56 size_of::<E>(),
57 "Cannot reinterpret {} as {}",
58 self.vtype,
59 E::vtype()
60 );
61 self.vtype = E::vtype();
62 }
63}
64
65pub struct ViewMut<'a> {
66 /// The physical type of the vector, which defines how the elements are stored.
67 pub(super) vtype: VType,
68 /// A pointer to the allocated elements buffer.
69 /// Alignment is at least the size of the element type.
70 /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
71 // TODO(ngates): it would be nice to guarantee _wider_ alignment, ideally 128 bytes, so that
72 // we can use aligned load/store instructions for wide SIMD lanes.
73 pub(super) elements: *mut u8,
74 /// The validity mask for the vector, indicating which elements in the buffer are valid.
75 /// This value can be `None` if the expected DType is `NonNullable`.
76 pub(super) validity: Option<BitViewMut<'a>>,
77
78 /// Additional buffers of data used by the vector, such as string data.
79 // TODO(ngates): ideally these buffers are compressed somehow? E.g. using FSST?
80 #[allow(dead_code)]
81 pub(super) data: Vec<ByteBuffer>,
82
83 /// The position of the selected values of this buffer.
84 /// One of:
85 /// * All - all N values are selected.
86 /// * Prefix - the first n values are selected where i is the true count of the kernel mask.
87 /// * Mask - the values are in the positions indicated by the kernel mask.
88 pub(super) selection: Selection,
89
90 /// Marker defining the lifetime of the contents of the vector.
91 pub(super) _marker: std::marker::PhantomData<&'a mut ()>,
92}
93
94impl<'a> ViewMut<'a> {
95 pub fn new<E: Element>(elements: &'a mut [E], validity: Option<BitViewMut<'a>>) -> Self {
96 assert_eq!(elements.len(), N);
97 Self {
98 vtype: E::vtype(),
99 elements: elements.as_mut_ptr().cast(),
100 validity,
101 data: vec![],
102 selection: Selection::Prefix,
103 _marker: Default::default(),
104 }
105 }
106
107 /// Re-interpret cast the vector into a new type where the element has the same width.
108 #[inline(always)]
109 pub fn reinterpret_as<E: Element>(&mut self) {
110 assert_eq!(
111 self.vtype.byte_width(),
112 size_of::<E>(),
113 "Cannot reinterpret {} as {}",
114 self.vtype,
115 E::vtype()
116 );
117 self.vtype = E::vtype();
118 }
119
120 /// Returns an immutable array of the elements in the vector.
121 #[inline(always)]
122 pub fn as_array<E: Element>(&self) -> &'a [E; N] {
123 debug_assert_eq!(self.vtype, E::vtype(), "Invalid type for canonical view");
124 unsafe { &*(self.elements.cast::<E>() as *const [E; N]) }
125 }
126
127 /// Returns a mutable array of the elements in the vector, allowing for modification.
128 #[inline(always)]
129 pub fn as_array_mut<E: Element>(&mut self) -> &'a mut [E; N] {
130 debug_assert_eq!(self.vtype, E::vtype(), "Invalid type for canonical view");
131 unsafe { &mut *(self.elements.cast::<E>() as *mut [E; N]) }
132 }
133
134 /// Access the validity mask of the vector.
135 ///
136 /// ## Panics
137 ///
138 /// Panics if the vector does not support validity, i.e. if the DType was non-nullable when
139 /// it was created.
140 pub fn validity(&mut self) -> &mut BitViewMut<'a> {
141 self.validity
142 .as_mut()
143 .vortex_expect("Vector does not support validity")
144 }
145
146 pub fn add_buffer(&mut self, buffer: ByteBuffer) {
147 self.data.push(buffer);
148 }
149
150 #[inline(always)]
151 pub fn selection(&self) -> Selection {
152 self.selection
153 }
154
155 pub fn set_selection(&mut self, selection: Selection) {
156 self.selection = selection;
157 }
158
159 /// Flatten the view by bringing the selected elements of the mask to the beginning of
160 pub fn flatten<E: Element>(&mut self, selection: &BitView<'_>) {
161 assert_eq!(
162 self.vtype,
163 E::vtype(),
164 "ViewMut::flatten_mask: type mismatch"
165 );
166
167 if matches!(self.selection, Selection::Prefix) {
168 // Nothing to do, all elements are already selected.
169 return;
170 }
171
172 match selection.true_count() {
173 0 | N => {
174 // If the mask has no true bits or all true bits, we are already flattened.
175 }
176 n if n > 3 * N / 4 => {
177 // High density: use iter_zeros to compact by removing gaps
178 let slice = self.as_array_mut::<E>();
179 let mut write_idx = 0;
180 let mut read_idx = 0;
181
182 selection.iter_zeros(|zero_idx| {
183 // Copy elements from read_idx to zero_idx (exclusive) to write_idx
184 let count = zero_idx - read_idx;
185 unsafe {
186 // SAFETY: We assume that the elements are of type E and that the view is valid.
187 // Using memmove for potentially overlapping regions
188 std::ptr::copy(
189 slice.as_ptr().add(read_idx),
190 slice.as_mut_ptr().add(write_idx),
191 count,
192 );
193 write_idx += count;
194 }
195 read_idx = zero_idx + 1;
196 });
197
198 // Copy any remaining elements after the last zero
199 unsafe {
200 std::ptr::copy(
201 slice.as_ptr().add(read_idx),
202 slice.as_mut_ptr().add(write_idx),
203 N - read_idx,
204 );
205 }
206 }
207 _ => {
208 let mut offset = 0;
209 let slice = self.as_array_mut::<E>();
210 selection.iter_ones(|idx| {
211 unsafe {
212 // SAFETY: We assume that the elements are of type E and that the view is valid.
213 let value = *slice.get_unchecked(idx);
214 // TODO(joe): use ptr increment (not offset).
215 *slice.get_unchecked_mut(offset) = value;
216
217 offset += 1;
218 }
219 });
220 }
221 }
222
223 self.selection = Selection::Prefix
224 }
225}